From 2ab28f322f9896782da904f5942f3873432addc8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Oct 2012 15:27:49 -0400 Subject: Btrfs: wait on ordered extents at the last possible moment Since we don't actually copy the extent information from the source tree in the fast case we don't need to wait for ordered io to be completed in order to fsync, we just need to wait for the io to be completed. So when we're logging our file just attach all of the ordered extents to the log, and then when the log syncs just wait for IO_DONE on the ordered extents and then write the super. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9027bb1e7466..7de720d22b74 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2281,6 +2281,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, unsigned long log_transid = 0; mutex_lock(&root->log_mutex); + log_transid = root->log_transid; index1 = root->log_transid % 2; if (atomic_read(&root->log_commit[index1])) { wait_log_commit(trans, root, root->log_transid); @@ -2308,11 +2309,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* bail out if we need to do a full commit */ if (root->fs_info->last_trans_log_full_commit == trans->transid) { ret = -EAGAIN; + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&root->log_mutex); goto out; } - log_transid = root->log_transid; if (log_transid % 2 == 0) mark = EXTENT_DIRTY; else @@ -2324,6 +2325,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); if (ret) { btrfs_abort_transaction(trans, root, ret); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&root->log_mutex); goto out; } @@ -2363,6 +2365,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } root->fs_info->last_trans_log_full_commit = trans->transid; btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out; @@ -2373,6 +2376,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); wait_log_commit(trans, log_root_tree, log_root_tree->log_transid); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = 0; goto out; @@ -2392,6 +2396,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ if (root->fs_info->last_trans_log_full_commit == trans->transid) { btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out_wake_log_root; @@ -2402,10 +2407,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW); if (ret) { btrfs_abort_transaction(trans, root, ret); + btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); + btrfs_wait_logged_extents(log, log_transid); btrfs_set_super_log_root(root->fs_info->super_for_commit, log_root_tree->node->start); @@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); } + /* + * We may have short-circuited the log tree with the full commit logic + * and left ordered extents on our list, so clear these out to keep us + * from leaking inodes and memory. + */ + btrfs_free_logged_extents(log, 0); + btrfs_free_logged_extents(log, 1); + free_extent_buffer(log->node); kfree(log); } @@ -3271,14 +3286,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans, struct btrfs_root *log = root->log_root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; + struct btrfs_ordered_extent *ordered; struct list_head ordered_sums; struct btrfs_map_token token; struct btrfs_key key; - u64 csum_offset = em->mod_start - em->start; - u64 csum_len = em->mod_len; + u64 mod_start = em->mod_start; + u64 mod_len = em->mod_len; + u64 csum_offset; + u64 csum_len; u64 extent_offset = em->start - em->orig_start; u64 block_len; int ret; + int index = log->log_transid % 2; bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; INIT_LIST_HEAD(&ordered_sums); @@ -3362,6 +3381,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans, csum_len = block_len; } + /* + * First check and see if our csums are on our outstanding ordered + * extents. + */ +again: + spin_lock_irq(&log->log_extents_lock[index]); + list_for_each_entry(ordered, &log->logged_list[index], log_list) { + struct btrfs_ordered_sum *sum; + + if (!mod_len) + break; + + if (ordered->inode != inode) + continue; + + if (ordered->file_offset + ordered->len <= mod_start || + mod_start + mod_len <= ordered->file_offset) + continue; + + /* + * We are going to copy all the csums on this ordered extent, so + * go ahead and adjust mod_start and mod_len in case this + * ordered extent has already been logged. + */ + if (ordered->file_offset > mod_start) { + if (ordered->file_offset + ordered->len >= + mod_start + mod_len) + mod_len = ordered->file_offset - mod_start; + /* + * If we have this case + * + * |--------- logged extent ---------| + * |----- ordered extent ----| + * + * Just don't mess with mod_start and mod_len, we'll + * just end up logging more csums than we need and it + * will be ok. + */ + } else { + if (ordered->file_offset + ordered->len < + mod_start + mod_len) { + mod_len = (mod_start + mod_len) - + (ordered->file_offset + ordered->len); + mod_start = ordered->file_offset + + ordered->len; + } else { + mod_len = 0; + } + } + + /* + * To keep us from looping for the above case of an ordered + * extent that falls inside of the logged extent. + */ + if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, + &ordered->flags)) + continue; + atomic_inc(&ordered->refs); + spin_unlock_irq(&log->log_extents_lock[index]); + /* + * we've dropped the lock, we must either break or + * start over after this. + */ + + wait_event(ordered->wait, ordered->csum_bytes_left == 0); + + list_for_each_entry(sum, &ordered->list, list) { + ret = btrfs_csum_file_blocks(trans, log, sum); + if (ret) { + btrfs_put_ordered_extent(ordered); + goto unlocked; + } + } + btrfs_put_ordered_extent(ordered); + goto again; + + } + spin_unlock_irq(&log->log_extents_lock[index]); +unlocked: + + if (!mod_len || ret) + return ret; + + csum_offset = mod_start - em->start; + csum_len = mod_len; + /* block start is already adjusted for the file extent offset. */ ret = btrfs_lookup_csums_range(log->fs_info->csum_root, em->block_start + csum_offset, @@ -3393,6 +3498,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; u64 test_gen; int ret = 0; + int num = 0; INIT_LIST_HEAD(&extents); @@ -3401,16 +3507,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, list_for_each_entry_safe(em, n, &tree->modified_extents, list) { list_del_init(&em->list); + + /* + * Just an arbitrary number, this can be really CPU intensive + * once we start getting a lot of extents, and really once we + * have a bunch of extents we just want to commit since it will + * be faster. + */ + if (++num > 32768) { + list_del_init(&tree->modified_extents); + ret = -EFBIG; + goto process; + } + if (em->generation <= test_gen) continue; /* Need a ref to keep it from getting evicted from cache */ atomic_inc(&em->refs); set_bit(EXTENT_FLAG_LOGGING, &em->flags); list_add_tail(&em->list, &extents); + num++; } list_sort(NULL, &extents, extent_cmp); +process: while (!list_empty(&extents)) { em = list_entry(extents.next, struct extent_map, list); @@ -3513,6 +3634,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&BTRFS_I(inode)->log_mutex); + btrfs_get_logged_extents(log, inode); + /* * a brute force approach to making sure we get the most uptodate * copies of everything. @@ -3656,6 +3779,8 @@ log_extents: BTRFS_I(inode)->logged_trans = trans->transid; BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; out_unlock: + if (err) + btrfs_free_logged_extents(log, log->log_transid); mutex_unlock(&BTRFS_I(inode)->log_mutex); btrfs_free_path(path); @@ -3822,7 +3947,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, end_trans: dput(old_parent); if (ret < 0) { - WARN_ON(ret != -ENOSPC); root->fs_info->last_trans_log_full_commit = trans->transid; ret = 1; } -- cgit v1.2.3 From dcfac4156fa102c1bab0e4e31df37e47278292f6 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 27 Dec 2012 09:01:20 +0000 Subject: Btrfs: kill unused argument of btrfs_pin_extent_for_log_replay Argument 'trans' is not used any more. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7de720d22b74..deb7f66356bd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_pin_extent_for_log_replay(wc->trans, - log->fs_info->extent_root, + btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, eb->start, eb->len); if (btrfs_buffer_uptodate(eb, gen, 0)) { -- cgit v1.2.3 From de78b51a2852bddccd6535e9e12de65f92787a1e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 31 Jan 2013 18:21:12 +0000 Subject: btrfs: remove cache only arguments from defrag path The entry point at the defrag ioctl always sets "cache only" to 0; the codepaths haven't run for a long time as far as I can tell. Chris says they're dead code, so remove them. Signed-off-by: Eric Sandeen Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index deb7f66356bd..1a79087c4575 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2738,7 +2738,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->keep_locks = 1; ret = btrfs_search_forward(root, &min_key, &max_key, - path, 0, trans->transid); + path, trans->transid); /* * we didn't find anything from this transaction, see if there @@ -3680,7 +3680,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, while (1) { ins_nr = 0; ret = btrfs_search_forward(root, &min_key, &max_key, - path, 0, trans->transid); + path, trans->transid); if (ret != 0) break; again: -- cgit v1.2.3 From fda2832febb1928da0625b2c5d15559b29d7e740 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 26 Feb 2013 08:10:22 +0000 Subject: btrfs: cleanup for open-coded alignment Though most of the btrfs codes are using ALIGN macro for page alignment, there are still some codes using open-coded alignment like the following: ------ u64 mask = ((u64)root->stripesize - 1); u64 ret = (val + mask) & ~mask; ------ Or even hidden one: ------ num_bytes = (end - start + blocksize) & ~(blocksize - 1); ------ Sometimes these open-coded alignment is not so easy to understand for newbie like me. This commit changes the open-coded alignment to the ALIGN macro for a better readability. Also there is a previous patch from David Sterba with similar changes, but the patch is for 3.2 kernel and seems not merged. http://www.spinics.net/lists/linux-btrfs/msg12747.html Cc: David Sterba Signed-off-by: Qu Wenruo Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1a79087c4575..e8b7a68e1b37 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -484,7 +484,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, struct btrfs_key *key) { int found_type; - u64 mask = root->sectorsize - 1; u64 extent_end; u64 start = key->offset; u64 saved_nbytes; @@ -501,7 +500,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = start + btrfs_file_extent_num_bytes(eb, item); else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); - extent_end = (start + size + mask) & ~mask; + extent_end = ALIGN(start + size, root->sectorsize); } else { ret = 0; goto out; -- cgit v1.2.3 From 3321719ed67440bba1b0c5ae19c30d640263ccc8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 27 Feb 2013 13:28:24 +0000 Subject: Btrfs: fix memory leak of log roots When we abort a transaction while fsyncing, we'll skip freeing log roots part of committing a transaction, which leads to memory leak. This adds a 'free log roots' in putting super when no more users hold references on log roots, so it's safe and clean. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e8b7a68e1b37..8e85e0e4333d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2466,8 +2466,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans, .process_func = process_one_buffer }; - ret = walk_log_tree(trans, log, &wc); - BUG_ON(ret); + if (trans) { + ret = walk_log_tree(trans, log, &wc); + BUG_ON(ret); + } while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, -- cgit v1.2.3 From 124fe663f93162d17b7e391705cac122101e93d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2013 11:47:21 -0500 Subject: Btrfs: delete inline extents when we find them during logging Apparently when we do inline extents we allow the data to overlap the last chunk of the btrfs_file_extent_item, which means that we can possibly have a btrfs_file_extent_item that isn't actually as large as a btrfs_file_extent_item. This messes with us when we try to overwrite the extent when logging new extents since we expect for it to be the right size. To fix this just delete the item and try to do the insert again which will give us the proper sized btrfs_file_extent_item. This fixes a panic where map_private_extent_buffer would blow up because we're trying to write past the end of the leaf. Thanks, Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8e85e0e4333d..c7ef569eb22a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3300,6 +3300,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, int index = log->log_transid % 2; bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; +insert: INIT_LIST_HEAD(&ordered_sums); btrfs_init_map_token(&token); key.objectid = btrfs_ino(inode); @@ -3315,6 +3316,23 @@ static int log_one_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + + /* + * If we are overwriting an inline extent with a real one then we need + * to just delete the inline extent as it may not be large enough to + * have the entire file_extent_item. + */ + if (ret && btrfs_token_file_extent_type(leaf, fi, &token) == + BTRFS_FILE_EXTENT_INLINE) { + ret = btrfs_del_item(trans, log, path); + btrfs_release_path(path); + if (ret) { + path->really_keep_locks = 0; + return ret; + } + goto insert; + } + btrfs_set_token_file_extent_generation(leaf, fi, em->generation, &token); if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { -- cgit v1.2.3 From 9bf7a4890518186238d2579be16ecc5190a707c0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 1 Mar 2013 13:35:47 -0500 Subject: Btrfs: use set_nlink if our i_nlink is 0 We need to inc the nlink of deleted entries when running replay so we can do the unlink on the fs_root and get everything cleaned up and then have the orphan cleanup do the right thing. The problem is inc_nlink complains about this, even thought it still does the right thing. So use set_nlink() if our i_nlink is 0 to keep users from seeing the warnings during log replay. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/tree-log.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c7ef569eb22a..451fad96ecd1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1382,7 +1382,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { - btrfs_inc_nlink(inode); + if (!inode->i_nlink) + set_nlink(inode, 1); + else + btrfs_inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; -- cgit v1.2.3