From c8cc6341653721b54760480b0d0d9b5f09b46741 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 1 Jul 2013 16:18:19 -0400 Subject: Btrfs: stop using GFP_ATOMIC for the tree mod log allocations Previously we held the tree mod lock when adding stuff because we use it to check and see if we truly do want to track tree modifications. This is admirable, but GFP_ATOMIC in a critical area that is going to get hit pretty hard and often is not nice. So instead do our basic checks to see if we don't need to track modifications, and if those pass then do our allocation, and then when we go to insert the new modification check if we still care, and if we don't just free up our mod and return. Otherwise we're good to go and we can carry on. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 161 +++++++++++++++++++------------------------------------ 1 file changed, 56 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ed504607d8ec..0d5c686f2b98 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -484,8 +484,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; + int ret = 0; + + BUG_ON(!tm); + + tree_mod_log_write_lock(fs_info); + if (list_empty(&fs_info->tree_mod_seq_list)) { + tree_mod_log_write_unlock(fs_info); + /* + * Ok we no longer care about logging modifications, free up tm + * and return 0. Any callers shouldn't be using tm after + * calling tree_mod_log_insert, but if they do we can just + * change this to return a special error code to let the callers + * do their own thing. + */ + kfree(tm); + return 0; + } - BUG_ON(!tm || !tm->seq); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; @@ -501,14 +520,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) else if (cur->seq > tm->seq) new = &((*new)->rb_right); else { + ret = -EEXIST; kfree(tm); - return -EEXIST; + goto out; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); - return 0; +out: + tree_mod_log_write_unlock(fs_info); + return ret; } /* @@ -524,57 +546,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 1; if (eb && btrfs_header_level(eb) == 0) return 1; - - tree_mod_log_write_lock(fs_info); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list when no blocker exists. - */ - tree_mod_log_write_unlock(fs_info); - return 1; - } - return 0; } -/* - * This allocates memory and gets a tree modification sequence number. - * - * Returns <0 on error. - * Returns >0 (the added sequence number) on success. - */ -static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) -{ - struct tree_mod_elem *tm; - - /* - * once we switch from spin locks to something different, we should - * honor the flags parameter here. - */ - tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); - if (!tm) - return -ENOMEM; - - spin_lock(&fs_info->tree_mod_seq_lock); - tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); - spin_unlock(&fs_info->tree_mod_seq_lock); - - return tm->seq; -} - static inline int __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int slot, enum mod_log_op op, gfp_t flags) { - int ret; struct tree_mod_elem *tm; - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - return ret; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; if (op != MOD_LOG_KEY_ADD) { @@ -589,34 +573,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - int ret; - if (tree_mod_dont_log(fs_info, eb)) return 0; - ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); - - tree_mod_log_write_unlock(fs_info); - return ret; -} - -static noinline int -tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - int slot, enum mod_log_op op) -{ - return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); -} - -static noinline int -tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op) -{ - return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); + return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); } static noinline int @@ -637,14 +601,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, * buffer, i.e. dst_slot < src_slot. */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING); + ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); BUG_ON(ret < 0); } - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -652,10 +616,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static inline void @@ -670,8 +631,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); + ret = __tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); BUG_ON(ret < 0); } } @@ -683,7 +644,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, int log_removal) { struct tree_mod_elem *tm; - int ret; if (tree_mod_dont_log(fs_info, NULL)) return 0; @@ -691,9 +651,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal) __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -701,10 +661,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static struct tree_mod_elem * @@ -784,23 +741,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { - tree_mod_log_write_unlock(fs_info); + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return; - } for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, + ret = __tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key_locked(fs_info, dst, + ret = __tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, + GFP_NOFS); BUG_ON(ret < 0); } - - tree_mod_log_write_unlock(fs_info); } static inline void @@ -819,9 +773,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, { int ret; - ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, - MOD_LOG_KEY_REPLACE, - atomic ? GFP_ATOMIC : GFP_NOFS); + ret = __tree_mod_log_insert_key(fs_info, eb, slot, + MOD_LOG_KEY_REPLACE, + atomic ? GFP_ATOMIC : GFP_NOFS); BUG_ON(ret < 0); } @@ -830,10 +784,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { if (tree_mod_dont_log(fs_info, eb)) return; - __tree_mod_log_free_eb(fs_info, eb); - - tree_mod_log_write_unlock(fs_info); } static noinline void @@ -1083,7 +1034,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != btrfs_header_generation(parent)); tree_mod_log_insert_key(root->fs_info, parent, parent_slot, - MOD_LOG_KEY_REPLACE); + MOD_LOG_KEY_REPLACE, GFP_NOFS); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, @@ -3208,7 +3159,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, } if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, GFP_NOFS); BUG_ON(ret < 0); } btrfs_set_node_key(lower, key, slot); @@ -4642,7 +4593,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, (nritems - slot - 1)); } else if (level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); } -- cgit v1.2.3 From db7f3436c1c186f8271018751fcb338cf3706e8d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Aug 2013 14:54:37 -0400 Subject: Btrfs: deal with enomem in the rewind path We can get ENOMEM trying to allocate dummy bufs for the rewind operation of the tree mod log. Instead of BUG_ON()'ing in this case pass up ENOMEM. I looked back through the callers and I'm pretty sure I got everybody who did BUG_ON(ret) in this path. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d5c686f2b98..1dd8a71f567d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1211,7 +1211,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb->start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1219,7 +1223,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } } btrfs_tree_read_unlock(eb); @@ -2772,6 +2780,10 @@ again: BTRFS_READ_LOCK); } b = tree_mod_log_rewind(root->fs_info, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p->locks[level] = BTRFS_READ_LOCK; p->nodes[level] = b; } else { -- cgit v1.2.3 From 9ec726775188906192f78ab9187640afd81ab996 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Aug 2013 16:57:23 -0400 Subject: Btrfs: stop using GFP_ATOMIC when allocating rewind ebs There is no reason we can't just set the path to blocking and then do normal GFP_NOFS allocations for these extent buffers. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1dd8a71f567d..5f7a97556583 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, } else { eb_rewin = btrfs_clone_extent_buffer(eb); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); extent_buffer_get(eb_rewin); @@ -1294,8 +1298,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); eb = alloc_dummy_extent_buffer(logical, root->nodesize); } else { + btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); - btrfs_tree_read_unlock(eb_root); + btrfs_tree_read_unlock_blocking(eb_root); free_extent_buffer(eb_root); } @@ -2779,7 +2784,7 @@ again: btrfs_clear_path_blocking(p, b, BTRFS_READ_LOCK); } - b = tree_mod_log_rewind(root->fs_info, b, time_seq); + b = tree_mod_log_rewind(root->fs_info, p, b, time_seq); if (!b) { ret = -ENOMEM; goto done; -- cgit v1.2.3 From ba5e8f2e2d3074bf151dd222dae9bb400e621b82 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 16 Aug 2013 16:52:55 -0400 Subject: Btrfs: fix send issues related to inode number reuse If you are sending a snapshot and specifying a parent snapshot we will walk the trees and figure out where they differ and send the differences only. The way we check for differences are if the leaves aren't the same and if the keys are not the same within the leaves. So if neither leaf is the same (ie the leaf has been cow'ed from the parent snapshot) we walk each item in the send root and check it against the parent root. If the items match exactly then we don't do anything. This doesn't quite work for inode refs, since they will just have the name and the parent objectid. If you move the file from a directory and then remove that directory and re-create a directory with the same inode number as the old directory and then move that file back into that directory we will assume that nothing changed and you will get errors when you try to receive. In order to fix this we need to do extra checking to see if the inode ref really is the same or not. So do this by passing down BTRFS_COMPARE_TREE_SAME if the items match. Then if the key type is an inode ref we can do some extra checking, otherwise we just keep processing. The extra checking is to look up the generation of the directory in the parent volume and compare it to the generation of the send volume. If they match then they are the same directory and we are good to go. If they don't we have to add them to the changed refs list. This means we have to track the generation of the ref we're trying to lookup when we iterate all the refs for a particular inode. So in the case of looking for new refs we have to get the generation from the parent volume, and in the case of looking for deleted refs we have to get the generation from the send volume to compare with. There was also the issue of using a ulist to keep track of the directories we needed to check. Because we can get a deleted ref and a new ref for the same inode number the ulist won't work since it indexes based on the value. So instead just dup any directory ref we find and add it to a local list, and then process that list as normal and do away with using a ulist for this altogether. Before we would fail all of the tests in the far-progs that related to moving directories (test group 32). With this patch we now pass these tests, and all of the tests in the far-progs send testing suite. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5f7a97556583..0708ebed2df7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5297,19 +5297,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; advance_right = ADVANCE; } else { + enum btrfs_compare_tree_result cmp; + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); ret = tree_compare_item(left_root, left_path, right_path, tmp_buf); - if (ret) { - WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); - ret = changed_cb(left_root, right_root, - left_path, right_path, - &left_key, - BTRFS_COMPARE_TREE_CHANGED, - ctx); - if (ret < 0) - goto out; - } + if (ret) + cmp = BTRFS_COMPARE_TREE_CHANGED; + else + cmp = BTRFS_COMPARE_TREE_SAME; + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, cmp, ctx); + if (ret < 0) + goto out; advance_left = ADVANCE; advance_right = ADVANCE; } -- cgit v1.2.3 From 35a3621beb3e2face3e7954eaee20a8fa0043fac Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 14 Aug 2013 18:12:25 +0200 Subject: Btrfs: get rid of sparse warnings make C=2 fs/btrfs/ CF=-D__CHECK_ENDIAN__ I tried to filter out the warnings for which patches have already been sent to the mailing list, pending for inclusion in btrfs-next. All these changes should be obviously safe. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0708ebed2df7..09b3870c2729 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1067,7 +1067,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, int looped = 0; if (!time_seq) - return 0; + return NULL; /* * the very last operation that's logged for a root is the replacement @@ -1078,7 +1078,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, tm = tree_mod_log_search_oldest(fs_info, root_logical, time_seq); if (!looped && !tm) - return 0; + return NULL; /* * if there are no tree operation for the oldest root, we simply * return it. this should only happen if that (old) root is at @@ -4782,7 +4782,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * This may release the path, and so you may lose any locks held at the * time you call it. */ -int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; struct btrfs_disk_key found_key; -- cgit v1.2.3 From c1c9ff7c94e83fae89a742df74db51156869bad5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:07 +0200 Subject: Btrfs: Remove superfluous casts from u64 to unsigned long long u64 is "unsigned long long" on all architectures now, so there's no need to cast it when formatting it using the "ll" length modifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 09b3870c2729..1d94242ec002 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1383,14 +1383,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, if (trans->transaction != root->fs_info->running_transaction) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long) + trans->transid, root->fs_info->running_transaction->transid); if (trans->transid != root->fs_info->generation) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long)root->fs_info->generation); + trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; -- cgit v1.2.3 From fba6aa75654394fccf2530041e9451414c28084f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:14 +0200 Subject: Btrfs: Make btrfs_header_fsid() return unsigned long Internally, btrfs_header_fsid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1d94242ec002..8e3efe3bad24 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -997,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -3109,8 +3107,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(c), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, @@ -3250,8 +3247,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(split), - BTRFS_FSID_SIZE); + btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); @@ -4006,8 +4002,7 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); + btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(right), -- cgit v1.2.3 From b308bc2f05a86e728bd035e21a4974acd05f4d1e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:15 +0200 Subject: Btrfs: Make btrfs_header_chunk_tree_uuid() return unsigned long Internally, btrfs_header_chunk_tree_uuid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8e3efe3bad24..5fa521bec07b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3111,8 +3111,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); @@ -3249,7 +3248,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, write_extent_buffer(split, root->fs_info->fsid, btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(split), + btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); @@ -4005,7 +4004,7 @@ again: btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(right), + btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); if (split == 0) { -- cgit v1.2.3 From d7396f07358a7c6e22c238d36d1d85f9d652a414 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 30 Aug 2013 15:46:43 +0100 Subject: Btrfs: optimize key searches in btrfs_search_slot When the binary search returns 0 (exact match), the target key will necessarily be at slot 0 of all nodes below the current one, so in this case the binary search is not needed because it will always return 0, and we waste time doing it, holding node locks for longer than necessary, etc. Below follow histograms with the times spent on the current approach of doing a binary search when the previous binary search returned 0, and times for the new approach, which directly picks the first item/child node in the leaf/node. Current approach: Count: 6682 Range: 35.000 - 8370.000; Mean: 85.837; Median: 75.000; Stddev: 106.429 Percentiles: 90th: 124.000; 95th: 145.000; 99th: 206.000 35.000 - 61.080: 1235 ################ 61.080 - 106.053: 4207 ##################################################### 106.053 - 183.606: 1122 ############## 183.606 - 317.341: 111 # 317.341 - 547.959: 6 | 547.959 - 8370.000: 1 | Approach proposed by this patch: Count: 6682 Range: 6.000 - 135.000; Mean: 16.690; Median: 16.000; Stddev: 7.160 Percentiles: 90th: 23.000; 95th: 27.000; 99th: 40.000 6.000 - 8.418: 58 # 8.418 - 11.670: 1149 ######################### 11.670 - 16.046: 2418 ##################################################### 16.046 - 21.934: 2098 ############################################## 21.934 - 29.854: 744 ################ 29.854 - 40.511: 154 ### 40.511 - 54.848: 41 # 54.848 - 74.136: 5 | 74.136 - 100.087: 9 | 100.087 - 135.000: 6 | These samples were captured during a run of the btrfs tests 001, 002 and 004 in the xfstests, with a leaf/node size of 4Kb. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5fa521bec07b..64346721173f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2426,6 +2426,40 @@ done: return ret; } +static void key_search_validate(struct extent_buffer *b, + struct btrfs_key *key, + int level) +{ +#ifdef CONFIG_BTRFS_ASSERT + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, key); + + if (level == 0) + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_leaf, items[0].key), + sizeof(disk_key))); + else + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_node, ptrs[0].key), + sizeof(disk_key))); +#endif +} + +static int key_search(struct extent_buffer *b, struct btrfs_key *key, + int level, int *prev_cmp, int *slot) +{ + if (*prev_cmp != 0) { + *prev_cmp = bin_search(b, key, level, slot); + return *prev_cmp; + } + + key_search_validate(b, key, level); + *slot = 0; + + return 0; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -2454,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int write_lock_level = 0; u8 lowest_level = 0; int min_write_lock_level; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -2484,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root min_write_lock_level = write_lock_level; again: + prev_cmp = -1; /* * we try very hard to do read locks on the root */ @@ -2584,7 +2620,7 @@ cow_done: if (!cow) btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2719,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2729,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: + prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2746,7 +2784,7 @@ again: */ btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; -- cgit v1.2.3 From 83d4cfd4da57b6ff16296875a962de2158799de6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 30 Aug 2013 15:09:51 -0400 Subject: Btrfs: fixup error handling in btrfs_reloc_cow If we failed to actually allocate the correct size of the extent to relocate we will end up in an infinite loop because we won't return an error, we'll just move on to the next extent. So fix this up by returning an error, and then fix all the callers to return an error up the stack rather than BUG_ON()'ing. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 64346721173f..61b5bcd57b7e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1005,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } - if (root->ref_cows) - btrfs_reloc_cow_block(trans, root, buf, cow); + if (root->ref_cows) { + ret = btrfs_reloc_cow_block(trans, root, buf, cow); + if (ret) + return ret; + } if (buf == root->node) { WARN_ON(parent && parent != buf); -- cgit v1.2.3