From b0857d309faefaf5443752458e8af1a4b22b3e92 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:23:41 -0400 Subject: ext4: defer clearing of PageWriteback after extent conversion Currently PageWriteback bit gets cleared from put_io_page() called from ext4_end_bio(). This is somewhat inconvenient as extent tree is not fully updated at that time (unwritten extents are not marked as written) so we cannot read the data back yet. This design was dictated by lock ordering as we cannot start a transaction while PageWriteback bit is set (we could easily deadlock with ext4_da_writepages()). But now that we use transaction reservation for extent conversion, locking issues are solved and we can move PageWriteback bit clearing after extent conversion is done. As a result we can remove wait for unwritten extent conversion from ext4_sync_file() because it already implicitely happens through wait_on_page_writeback(). We implement deferring of PageWriteback clearing by queueing completed bios to appropriate io_end and processing all the pages when io_end is going to be freed instead of at the moment ext4_io_end() is called. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/ext4/fsync.c') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e0ba8a408def..dcc881b30849 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -132,10 +132,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (inode->i_sb->s_flags & MS_RDONLY) goto out; - ret = ext4_flush_unwritten_io(inode); - if (ret < 0) - goto out; - if (!journal) { ret = __sync_inode(inode, datasync); if (!ret && !hlist_empty(&inode->i_dentry)) -- cgit v1.2.3 From 37b10dd06334ebc89f551d405a0fe27e1a622458 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:40:09 -0400 Subject: ext4: use generic_file_fsync() in ext4_file_fsync() in nojournal mode Just use the generic function instead of duplicating it. We only need to reshuffle the read-only check a bit (which is there to prevent writing to a filesystem which has been remounted read-only after error I assume). Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 47 +++++++++++------------------------------------ 1 file changed, 11 insertions(+), 36 deletions(-) (limited to 'fs/ext4/fsync.c') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index dcc881b30849..6019bd449576 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -73,32 +73,6 @@ static int ext4_sync_parent(struct inode *inode) return ret; } -/** - * __sync_file - generic_file_fsync without the locking and filemap_write - * @inode: inode to sync - * @datasync: only sync essential metadata if true - * - * This is just generic_file_fsync without the locking. This is needed for - * nojournal mode to make sure this inodes data/metadata makes it to disk - * properly. The i_mutex should be held already. - */ -static int __sync_inode(struct inode *inode, int datasync) -{ - int err; - int ret; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = sync_inode_metadata(inode, 1); - if (ret == 0) - ret = err; - return ret; -} - /* * akpm: A new design for ext4_sync_file(). * @@ -116,7 +90,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - int ret, err; + int ret = 0, err; tid_t commit_tid; bool needs_barrier = false; @@ -124,21 +98,21 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_ext4_sync_file_enter(file, datasync); - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; - mutex_lock(&inode->i_mutex); - if (inode->i_sb->s_flags & MS_RDONLY) - goto out; + goto out_trace; if (!journal) { - ret = __sync_inode(inode, datasync); + ret = generic_file_fsync(file, start, end, datasync); if (!ret && !hlist_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); - goto out; + goto out_trace; } + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. @@ -168,8 +142,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!ret) ret = err; } - out: +out: mutex_unlock(&inode->i_mutex); +out_trace: trace_ext4_sync_file_exit(inode, ret); return ret; } -- cgit v1.2.3 From 92e6222dfb85db780ebd8caea6a3f9326c375bc0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:40:39 -0400 Subject: ext4: remove i_mutex from ext4_file_sync() After removal of ext4_flush_unwritten_io() call, ext4_file_sync() doesn't need i_mutex anymore. Forcing of transaction commits doesn't need i_mutex as there's nothing inode specific in that code apart from grabbing transaction ids from the inode. So remove the lock. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/ext4/fsync.c') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 6019bd449576..fc938ebbddec 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -99,20 +99,18 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_ext4_sync_file_enter(file, datasync); if (inode->i_sb->s_flags & MS_RDONLY) - goto out_trace; + goto out; if (!journal) { ret = generic_file_fsync(file, start, end, datasync); if (!ret && !hlist_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); - goto out_trace; + goto out; } ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret) return ret; - mutex_lock(&inode->i_mutex); - /* * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. @@ -143,8 +141,6 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = err; } out: - mutex_unlock(&inode->i_mutex); -out_trace: trace_ext4_sync_file_exit(inode, ret); return ret; } -- cgit v1.2.3 From 4418e14112e3ca85e8492a4489a3552b0cc526a8 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 12 Jun 2013 22:38:04 -0400 Subject: ext4: Fix fsync error handling after filesystem abort If filesystem was aborted after inode's write back is complete but before its metadata was updated we may return success results in data loss. In order to handle fs abort correctly we have to check fs state once we discover that it is in MS_RDONLY state Test case: http://patchwork.ozlabs.org/patch/244297 Reviewed-by: Jan Kara Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ext4/fsync.c') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index fc938ebbddec..a8bc47f75fa0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -98,8 +98,13 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) trace_ext4_sync_file_enter(file, datasync); - if (inode->i_sb->s_flags & MS_RDONLY) + if (inode->i_sb->s_flags & MS_RDONLY) { + /* Make sure that we read updated s_mount_flags value */ + smp_rmb(); + if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) + ret = -EROFS; goto out; + } if (!journal) { ret = generic_file_fsync(file, start, end, datasync); -- cgit v1.2.3