From de1414a654e66b81b5348dbc5259ecf2fb61655e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Jan 2015 10:42:36 +0100 Subject: fs: export inode_to_bdi and use it in favor of mapping->backing_dev_info Now that we got rid of the bdi abuse on character devices we can always use sb->s_bdi to get at the backing_dev_info for a file, except for the block device special case. Export inode_to_bdi and replace uses of mapping->backing_dev_info with it to prepare for the removal of mapping->backing_dev_info. Signed-off-by: Christoph Hellwig Reviewed-by: Tejun Heo Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- fs/ocfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3950693dd0f6..abe7d98d6178 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2363,7 +2363,7 @@ relock: goto out_dio; } } else { - current->backing_dev_info = file->f_mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); written = generic_perform_write(file, from, *ppos); if (likely(written >= 0)) iocb->ki_pos = *ppos + written; -- cgit v1.2.3 From 696cdf730f40c97ebccacddd40c5382608dc8320 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Tue, 10 Feb 2015 14:08:46 -0800 Subject: ocfs2: fix uninitialized variable access Variable "why" is not yet initialized at line 615, fix it. Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3950693dd0f6..245db4f504da 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -569,7 +569,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, handle_t *handle = NULL; struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; - enum ocfs2_alloc_restarted why; + enum ocfs2_alloc_restarted why = RESTART_NONE; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; int did_quota = 0; -- cgit v1.2.3 From 026749a86ebff68cb2accdcd29872d36ac148920 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 16 Feb 2015 15:59:50 -0800 Subject: ocfs2: prepare some interfaces used in append direct io Currently in case of append O_DIRECT write (block not allocated yet), ocfs2 will fall back to buffered I/O. This has some disadvantages. Firstly, it is not the behavior as expected. Secondly, it will consume huge page cache, e.g. in mass backup scenario. Thirdly, modern filesystems such as ext4 support this feature. In this patch set, the direct I/O write doesn't fallback to buffer I/O write any more because the allocate blocks are enabled in direct I/O now. This patch (of 9): Prepare some interfaces which will be used in append O_DIRECT write. Signed-off-by: Joseph Qi Cc: Weiwei Wang Cc: Mark Fasheh Cc: Joel Becker Cc: Xuejiufei Cc: Junxiao Bi Cc: alex chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e0f04d55fd05..2fce3c40ad27 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -295,7 +295,7 @@ out: return ret; } -static int ocfs2_set_inode_size(handle_t *handle, +int ocfs2_set_inode_size(handle_t *handle, struct inode *inode, struct buffer_head *fe_bh, u64 new_i_size) @@ -441,7 +441,7 @@ out: return status; } -static int ocfs2_truncate_file(struct inode *inode, +int ocfs2_truncate_file(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size) { @@ -709,6 +709,13 @@ leave: return status; } +int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, + u32 clusters_to_add, int mark_unwritten) +{ + return __ocfs2_extend_allocation(inode, logical_start, + clusters_to_add, mark_unwritten); +} + /* * While a write will already be ordering the data, a truncate will not. * Thus, we need to explicitly order the zeroed pages. -- cgit v1.2.3 From d943d59dd32d33cd8a44a2f9caf373ede11200da Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 16 Feb 2015 16:00:06 -0800 Subject: ocfs2: do not fallback to buffer I/O write if appending Now we can do direct io and do not fallback to buffered IO any more in case of append O_DIRECT write. Signed-off-by: Joseph Qi Cc: Weiwei Wang Cc: Joel Becker Cc: Junxiao Bi Cc: Mark Fasheh Cc: Xuejiufei Cc: alex chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2fce3c40ad27..1055a2ece738 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2116,6 +2116,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; loff_t saved_pos = 0, end; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int full_coherency = !(osb->s_mount_opt & + OCFS2_MOUNT_COHERENCY_BUFFERED); /* * We start with a read level meta lock and only jump to an ex @@ -2204,7 +2207,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * one node could wind up truncating another * nodes writes. */ - if (end > i_size_read(inode)) { + if (end > i_size_read(inode) && !full_coherency) { *direct_io = 0; break; } -- cgit v1.2.3 From 3a83b342c87e6d21290de8dc76ec20a67821261d Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 16 Feb 2015 16:00:09 -0800 Subject: ocfs2: complete the rest request through buffer io Complte the rest request thourgh buffer io after direct write performed. Signed-off-by: Joseph Qi Cc: Weiwei Wang Cc: Joel Becker Cc: Junxiao Bi Cc: Mark Fasheh Cc: Xuejiufei Cc: alex chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1055a2ece738..784f2c72c992 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2253,6 +2253,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct address_space *mapping = file->f_mapping; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); @@ -2367,11 +2368,51 @@ relock: iov_iter_truncate(from, count); if (direct_io) { + loff_t endbyte; + ssize_t written_buffered; written = generic_file_direct_write(iocb, from, *ppos); - if (written < 0) { + if (written < 0 || written == count) { ret = written; goto out_dio; } + + /* + * for completing the rest of the request. + */ + *ppos += written; + count -= written; + written_buffered = generic_perform_write(file, from, *ppos); + /* + * If generic_file_buffered_write() returned a synchronous error + * then we want to return the number of bytes which were + * direct-written, or the error code if that was zero. Note + * that this differs from normal direct-io semantics, which + * will return -EFOO even if some bytes were written. + */ + if (written_buffered < 0) { + ret = written_buffered; + goto out_dio; + } + + iocb->ki_pos = *ppos + written_buffered; + /* We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT + * semantics. + */ + endbyte = *ppos + written_buffered - 1; + ret = filemap_write_and_wait_range(file->f_mapping, *ppos, + endbyte); + if (ret == 0) { + written += written_buffered; + invalidate_mapping_pages(mapping, + *ppos >> PAGE_CACHE_SHIFT, + endbyte >> PAGE_CACHE_SHIFT); + } else { + /* + * We don't know how much we wrote, so just return + * the number of bytes which were direct-written + */ + } } else { current->backing_dev_info = inode_to_bdi(inode); written = generic_perform_write(file, from, *ppos); -- cgit v1.2.3 From 160cc266639d4213c15c103074561c1b44ffe691 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 16 Feb 2015 16:00:15 -0800 Subject: ocfs2: set append dio as a ro compat feature Intruduce a bit OCFS2_FEATURE_RO_COMPAT_APPEND_DIO and check it in write flow. If the bit is not set, fall back to the old way. Signed-off-by: Joseph Qi Cc: Weiwei Wang Cc: Joel Becker Cc: Junxiao Bi Cc: Mark Fasheh Cc: Xuejiufei Cc: alex chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 784f2c72c992..46e0d4e857c7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2212,6 +2212,15 @@ static int ocfs2_prepare_inode_for_write(struct file *file, break; } + /* + * Fallback to old way if the feature bit is not set. + */ + if (end > i_size_read(inode) && + !ocfs2_supports_append_dio(osb)) { + *direct_io = 0; + break; + } + /* * We don't fill holes during direct io, so * check for them here. If any are found, the @@ -2220,7 +2229,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file, */ ret = ocfs2_check_range_for_holes(inode, saved_pos, count); if (ret == 1) { - *direct_io = 0; + /* + * Fallback to old way if the feature bit is not set. + * Otherwise try dio first and then complete the rest + * request through buffer io. + */ + if (!ocfs2_supports_append_dio(osb)) + *direct_io = 0; ret = 0; } else if (ret < 0) mlog_errno(ret); -- cgit v1.2.3