From 5d1f40202bad12d4c70a2d40a420b30d23a72b1a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 30 Jan 2013 14:17:31 -0500 Subject: Btrfs: fix missing i_size update If we have an ordered extent before the ordered extent we are currently completing that is after the current disk_i_size we will put our i_size update into that ordered extent so that we do not expose stale data. The problem is that if our disk i_size is updated past the previous ordered extent we won't update the i_size with the pending i_size update. So check the pending i_size update and if its above the current disk i_size we need to go ahead and try to update. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ordered-data.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f10731297040..fc6840b53d9d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, * if the disk i_size is already at the inode->i_size, or * this ordered extent is inside the disk i_size, we're done */ - if (disk_i_size == i_size || offset <= disk_i_size) { + if (disk_i_size == i_size) + goto out; + + /* + * We still need to update disk_i_size if outstanding_isize is greater + * than disk_i_size. + */ + if (offset <= disk_i_size && + (!ordered || ordered->outstanding_isize <= disk_i_size)) goto out; - } /* * walk backward from this ordered extent to disk_i_size. -- cgit v1.2.3 From 59fe4f41976f6331b695ff049296d082cf621823 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 30 Jan 2013 14:31:31 -0500 Subject: Btrfs: fix possible stale data exposure We specifically do not update the disk i_size if there are ordered extents outstanding for any area between the current disk_i_size and our ordered extent so that we do not expose stale data. The problem is the check we have only checks if the ordered extent starts at or after the current disk_i_size, which doesn't take into account an ordered extent that starts before the current disk_i_size and ends past the disk_i_size. Fix this by checking if the extent ends past the disk_i_size. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ordered-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index fc6840b53d9d..e5ed56729607 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -877,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, break; if (test->file_offset >= i_size) break; - if (test->file_offset >= disk_i_size) { + if (entry_end(test) > disk_i_size) { /* * we don't update disk_i_size now, so record this * undealt i_size. Or we will not know the real -- cgit v1.2.3 From 2ab28f322f9896782da904f5942f3873432addc8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2012 15:27:49 -0400 Subject: Btrfs: wait on ordered extents at the last possible moment Since we don't actually copy the extent information from the source tree in the fast case we don't need to wait for ordered io to be completed in order to fsync, we just need to wait for the io to be completed. So when we're logging our file just attach all of the ordered extents to the log, and then when the log syncs just wait for IO_DONE on the ordered extents and then write the super. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ordered-data.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e5ed56729607..f14b17432117 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) && + !(type == BTRFS_ORDERED_NOCOW)) + entry->csum_bytes_left = disk_len; entry->disk_len = disk_len; entry->bytes_left = len; entry->inode = igrab(inode); @@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, INIT_LIST_HEAD(&entry->root_extent_list); INIT_LIST_HEAD(&entry->work_list); init_completion(&entry->completion); + INIT_LIST_HEAD(&entry->log_list); trace_btrfs_ordered_extent_add(inode, entry); @@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode, tree = &BTRFS_I(inode)->ordered_tree; spin_lock_irq(&tree->lock); list_add_tail(&sum->list, &entry->list); + WARN_ON(entry->csum_bytes_left < sum->len); + entry->csum_bytes_left -= sum->len; + if (entry->csum_bytes_left == 0) + wake_up(&entry->wait); spin_unlock_irq(&tree->lock); } @@ -405,6 +413,66 @@ out: return ret == 0; } +/* Needs to either be called under a log transaction or the log_mutex */ +void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) +{ + struct btrfs_ordered_inode_tree *tree; + struct btrfs_ordered_extent *ordered; + struct rb_node *n; + int index = log->log_transid % 2; + + tree = &BTRFS_I(inode)->ordered_tree; + spin_lock_irq(&tree->lock); + for (n = rb_first(&tree->tree); n; n = rb_next(n)) { + ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); + spin_lock(&log->log_extents_lock[index]); + if (list_empty(&ordered->log_list)) { + list_add_tail(&ordered->log_list, &log->logged_list[index]); + atomic_inc(&ordered->refs); + } + spin_unlock(&log->log_extents_lock[index]); + } + spin_unlock_irq(&tree->lock); +} + +void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) +{ + struct btrfs_ordered_extent *ordered; + int index = transid % 2; + + spin_lock_irq(&log->log_extents_lock[index]); + while (!list_empty(&log->logged_list[index])) { + ordered = list_first_entry(&log->logged_list[index], + struct btrfs_ordered_extent, + log_list); + list_del_init(&ordered->log_list); + spin_unlock_irq(&log->log_extents_lock[index]); + wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, + &ordered->flags)); + btrfs_put_ordered_extent(ordered); + spin_lock_irq(&log->log_extents_lock[index]); + } + spin_unlock_irq(&log->log_extents_lock[index]); +} + +void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid) +{ + struct btrfs_ordered_extent *ordered; + int index = transid % 2; + + spin_lock_irq(&log->log_extents_lock[index]); + while (!list_empty(&log->logged_list[index])) { + ordered = list_first_entry(&log->logged_list[index], + struct btrfs_ordered_extent, + log_list); + list_del_init(&ordered->log_list); + spin_unlock_irq(&log->log_extents_lock[index]); + btrfs_put_ordered_extent(ordered); + spin_lock_irq(&log->log_extents_lock[index]); + } + spin_unlock_irq(&log->log_extents_lock[index]); +} + /* * used to drop a reference on an ordered extent. This will free * the extent if the last reference is dropped -- cgit v1.2.3 From 5b947f1ba959d4b19068f687ecd39900d1eea1aa Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 22 Jan 2013 10:52:04 +0000 Subject: Btrfs: don't traverse the ordered operation list repeatedly btrfs_run_ordered_operations() needn't traverse the ordered operation list repeatedly, it is because the transaction commiter will invoke it again when there is no other writer in this transaction, it can ensure that no one can add new objects into the ordered operation list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/ordered-data.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f14b17432117..9489fa96e3ed 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -626,14 +626,10 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->fs_info->ordered_extent_lock); -again: list_splice_init(&root->fs_info->ordered_operations, &splice); - while (!list_empty(&splice)) { - btrfs_inode = list_entry(splice.next, struct btrfs_inode, ordered_operations); - inode = &btrfs_inode->vfs_inode; list_del_init(&btrfs_inode->ordered_operations); @@ -642,22 +638,20 @@ again: * the inode may be getting freed (in sys_unlink path). */ inode = igrab(inode); - - if (!wait && inode) { - list_add_tail(&BTRFS_I(inode)->ordered_operations, - &root->fs_info->ordered_operations); - } - if (!inode) continue; + + if (!wait) + list_add_tail(&BTRFS_I(inode)->ordered_operations, + &root->fs_info->ordered_operations); spin_unlock(&root->fs_info->ordered_extent_lock); work = btrfs_alloc_delalloc_work(inode, wait, 1); if (!work) { + spin_lock(&root->fs_info->ordered_extent_lock); if (list_empty(&BTRFS_I(inode)->ordered_operations)) list_add_tail(&btrfs_inode->ordered_operations, &splice); - spin_lock(&root->fs_info->ordered_extent_lock); list_splice_tail(&splice, &root->fs_info->ordered_operations); spin_unlock(&root->fs_info->ordered_extent_lock); @@ -671,9 +665,6 @@ again: cond_resched(); spin_lock(&root->fs_info->ordered_extent_lock); } - if (wait && !list_empty(&root->fs_info->ordered_operations)) - goto again; - spin_unlock(&root->fs_info->ordered_extent_lock); out: list_for_each_entry_safe(work, next, &works, list) { -- cgit v1.2.3 From 569e0f358c0c37f6733702d4a5d2c412860f7169 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Feb 2013 11:09:14 -0500 Subject: Btrfs: place ordered operations on a per transaction list Miao made the ordered operations stuff run async, which introduced a deadlock where we could get somebody (sync) racing in and committing the transaction while a commit was already happening. The new committer would try and flush ordered operations which would hang waiting for the commit to finish because it is done asynchronously and no longer inherits the callers trans handle. To fix this we need to make the ordered operations list a per transaction list. We can get new inodes added to the ordered operation list by truncating them and then having another process writing to them, so this makes it so that anybody trying to add an ordered operation _must_ start a transaction in order to add itself to the list, which will keep new inodes from getting added to the ordered operations list after we start committing. This should fix the deadlock and also keeps us from doing a lot more work than we need to during commit. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ordered-data.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9489fa96e3ed..dc08d77b717e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -612,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) * extra check to make sure the ordered operation list really is empty * before we return */ -int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) +int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int wait) { struct btrfs_inode *btrfs_inode; struct inode *inode; + struct btrfs_transaction *cur_trans = trans->transaction; struct list_head splice; struct list_head works; struct btrfs_delalloc_work *work, *next; @@ -626,7 +628,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->fs_info->ordered_extent_lock); - list_splice_init(&root->fs_info->ordered_operations, &splice); + list_splice_init(&cur_trans->ordered_operations, &splice); while (!list_empty(&splice)) { btrfs_inode = list_entry(splice.next, struct btrfs_inode, ordered_operations); @@ -643,7 +645,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) if (!wait) list_add_tail(&BTRFS_I(inode)->ordered_operations, - &root->fs_info->ordered_operations); + &cur_trans->ordered_operations); spin_unlock(&root->fs_info->ordered_extent_lock); work = btrfs_alloc_delalloc_work(inode, wait, 1); @@ -653,7 +655,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) list_add_tail(&btrfs_inode->ordered_operations, &splice); list_splice_tail(&splice, - &root->fs_info->ordered_operations); + &cur_trans->ordered_operations); spin_unlock(&root->fs_info->ordered_extent_lock); ret = -ENOMEM; goto out; @@ -1033,6 +1035,7 @@ out: void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { + struct btrfs_transaction *cur_trans = trans->transaction; u64 last_mod; last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); @@ -1047,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->ordered_extent_lock); if (list_empty(&BTRFS_I(inode)->ordered_operations)) { list_add_tail(&BTRFS_I(inode)->ordered_operations, - &root->fs_info->ordered_operations); + &cur_trans->ordered_operations); } spin_unlock(&root->fs_info->ordered_extent_lock); } -- cgit v1.2.3