From e94acd86d48d61a5d919d807ed1efa0d8c1cd5ae Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Mon, 4 Nov 2013 22:34:28 +0100 Subject: btrfs: replace path->slots[0] with otherwise unused variable 'slot' Signed-off-by: Valentina Giusti Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3775947429b2..826b98c211ae 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1683,8 +1683,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, btrfs_release_path(path); leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + item_size = btrfs_item_size_nr(leaf, slot); + ptr = btrfs_item_ptr_offset(leaf, slot); cur_offset = 0; while (cur_offset < item_size) { -- cgit v1.2.3 From e33d5c3d6d61518c7f115af6d11d3dffa230d31f Mon Sep 17 00:00:00 2001 From: Kelley Nielsen Date: Mon, 4 Nov 2013 19:33:33 -0800 Subject: btrfs: bootstrap generic btrfs_find_item interface There are many btrfs functions that manually search the tree for an item. They all reimplement the same mechanism and differ in the conditions that they use to find the item. __inode_info() is one such example. Zach Brown proposed creating a new interface to take the place of these functions. This patch is the first step to creating the interface. A new function, btrfs_find_item, has been added to ctree.c and prototyped in ctree.h. It is identical to __inode_info, except that the order of the parameters has been rearranged to more closely those of similar functions elsewhere in the code (now, root and path come first, then the objectid, offset and type, and the key to be filled in last). __inode_info's callers have been set to call this new function instead, and __inode_info itself has been removed. Signed-off-by: Kelley Nielsen Suggested-by: Zach Brown Reviewed-by: Josh Triplett Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 40 ++++------------------------------------ 1 file changed, 4 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 826b98c211ae..6a3f7f50ad37 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1107,38 +1107,6 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return 0; } - -static int __inode_info(u64 inum, u64 ioff, u8 key_type, - struct btrfs_root *fs_root, struct btrfs_path *path, - struct btrfs_key *found_key) -{ - int ret; - struct btrfs_key key; - struct extent_buffer *eb; - - key.type = key_type; - key.objectid = inum; - key.offset = ioff; - - ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); - if (ret < 0) - return ret; - - eb = path->nodes[0]; - if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(fs_root, path); - if (ret) - return ret; - eb = path->nodes[0]; - } - - btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); - if (found_key->type != key.type || found_key->objectid != key.objectid) - return 1; - - return 0; -} - /* * this makes the path point to (inum INODE_ITEM ioff) */ @@ -1146,16 +1114,16 @@ int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, struct btrfs_path *path) { struct btrfs_key key; - return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, - &key); + return btrfs_find_item(fs_root, path, inum, ioff, + BTRFS_INODE_ITEM_KEY, &key); } static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, struct btrfs_path *path, struct btrfs_key *found_key) { - return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, - found_key); + return btrfs_find_item(fs_root, path, inum, ioff, + BTRFS_INODE_REF_KEY, found_key); } int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, -- cgit v1.2.3 From 3fe81ce206f3805e0eb5d886aabbf91064655144 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sun, 15 Dec 2013 12:43:58 +0000 Subject: Btrfs: fix deadlock when iterating inode refs and running delayed inodes While running btrfs/004 from xfstests, after 503 iterations, dmesg reported a deadlock between tasks iterating inode refs and tasks running delayed inodes (during a transaction commit). It turns out that iterating inode refs implies doing one tree search and release all nodes in the path except the leaf node, and then passing that leaf node to btrfs_ref_to_path(), which in turn does another tree search without releasing the lock on the leaf node it received as parameter. This is a problem when other task wants to write to the btree as well and ends up updating the leaf that is read locked - the writer task locks the parent of the leaf and then blocks waiting for the leaf's lock to be released - at the same time, the task executing btrfs_ref_to_path() does a second tree search, without releasing the lock on the first leaf, and wants to access a leaf (the same or another one) that is a child of the same parent, resulting in a deadlock. The trace reported by lockdep follows. [84314.936373] INFO: task fsstress:11930 blocked for more than 120 seconds. [84314.936381] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84314.936383] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84314.936386] fsstress D ffff8806e1bf8000 0 11930 11926 0x00000000 [84314.936393] ffff8804d6d89b78 0000000000000046 ffff8804d6d89b18 ffffffff810bd8bd [84314.936399] ffff8806e1bf8000 ffff8804d6d89fd8 ffff8804d6d89fd8 ffff8804d6d89fd8 [84314.936405] ffff880806308000 ffff8806e1bf8000 ffff8804d6d89c08 ffff8804deb8f190 [84314.936410] Call Trace: [84314.936421] [] ? trace_hardirqs_on+0xd/0x10 [84314.936428] [] schedule+0x29/0x70 [84314.936451] [] btrfs_tree_lock+0x75/0x270 [btrfs] [84314.936457] [] ? __init_waitqueue_head+0x60/0x60 [84314.936470] [] btrfs_search_slot+0x7f1/0x930 [btrfs] [84314.936489] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936504] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [84314.936510] [] ? trace_hardirqs_on_caller+0x1f/0x1e0 [84314.936528] [] __btrfs_update_delayed_inode+0x4c/0x1d0 [btrfs] [84314.936543] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936558] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936573] [] __btrfs_run_delayed_items+0x192/0x1e0 [btrfs] [84314.936589] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [84314.936604] [] btrfs_flush_all_pending_stuffs+0x24/0x80 [btrfs] [84314.936620] [] btrfs_commit_transaction+0x223/0xa20 [btrfs] [84314.936630] [] btrfs_sync_fs+0x6e/0x110 [btrfs] [84314.936635] [] ? __sync_filesystem+0x60/0x60 [84314.936639] [] ? __sync_filesystem+0x60/0x60 [84314.936643] [] sync_fs_one_sb+0x20/0x30 [84314.936648] [] iterate_supers+0xf1/0x100 [84314.936652] [] sys_sync+0x55/0x90 [84314.936658] [] system_call_fastpath+0x16/0x1b [84314.936660] INFO: lockdep is turned off. [84314.936663] INFO: task btrfs:11955 blocked for more than 120 seconds. [84314.936666] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84314.936668] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84314.936670] btrfs D ffff880541729a88 0 11955 11608 0x00000000 [84314.936674] ffff880541729a38 0000000000000046 ffff8805417299d8 ffffffff810bd8bd [84314.936680] ffff88075430c8a0 ffff880541729fd8 ffff880541729fd8 ffff880541729fd8 [84314.936685] ffffffff81c104e0 ffff88075430c8a0 ffff8804de8b00b8 ffff8804de8b0000 [84314.936690] Call Trace: [84314.936695] [] ? trace_hardirqs_on+0xd/0x10 [84314.936700] [] schedule+0x29/0x70 [84314.936717] [] btrfs_tree_read_lock+0xd5/0x140 [btrfs] [84314.936721] [] ? __init_waitqueue_head+0x60/0x60 [84314.936733] [] btrfs_search_slot+0x7c1/0x930 [btrfs] [84314.936746] [] btrfs_find_item+0x55/0x160 [btrfs] [84314.936763] [] ? free_extent_buffer+0x49/0xc0 [btrfs] [84314.936780] [] btrfs_ref_to_path+0xba/0x1e0 [btrfs] [84314.936797] [] ? release_extent_buffer+0xb9/0xe0 [btrfs] [84314.936813] [] ? free_extent_buffer+0x49/0xc0 [btrfs] [84314.936830] [] inode_to_path+0x60/0xd0 [btrfs] [84314.936846] [] paths_from_inode+0x115/0x3c0 [btrfs] [84314.936851] [] ? kmem_cache_alloc_trace+0x114/0x200 [84314.936868] [] btrfs_ioctl+0xf14/0x2030 [btrfs] [84314.936873] [] ? _raw_spin_unlock+0x2b/0x50 [84314.936877] [] ? handle_mm_fault+0x34f/0xb00 [84314.936882] [] ? up_read+0x23/0x40 [84314.936886] [] ? __do_page_fault+0x20c/0x5a0 [84314.936892] [] do_vfs_ioctl+0x96/0x570 [84314.936896] [] ? error_sti+0x5/0x6 [84314.936901] [] ? trace_hardirqs_off_caller+0x28/0xd0 [84314.936906] [] ? retint_swapgs+0xe/0x13 [84314.936910] [] SyS_ioctl+0x91/0xb0 [84314.936915] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [84314.936920] [] system_call_fastpath+0x16/0x1b [84314.936922] INFO: lockdep is turned off. [84434.866873] INFO: task btrfs-transacti:11921 blocked for more than 120 seconds. [84434.866881] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84434.866883] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84434.866886] btrfs-transacti D ffff880755b6a478 0 11921 2 0x00000000 [84434.866893] ffff8800735b9ce8 0000000000000046 ffff8800735b9c88 ffffffff810bd8bd [84434.866899] ffff8805a1b848a0 ffff8800735b9fd8 ffff8800735b9fd8 ffff8800735b9fd8 [84434.866904] ffffffff81c104e0 ffff8805a1b848a0 ffff880755b6a478 ffff8804cece78f0 [84434.866910] Call Trace: [84434.866920] [] ? trace_hardirqs_on+0xd/0x10 [84434.866927] [] schedule+0x29/0x70 [84434.866948] [] wait_current_trans.isra.33+0xbf/0x120 [btrfs] [84434.866954] [] ? __init_waitqueue_head+0x60/0x60 [84434.866970] [] start_transaction+0x388/0x5a0 [btrfs] [84434.866985] [] ? transaction_kthread+0xb5/0x280 [btrfs] [84434.866999] [] btrfs_attach_transaction+0x17/0x20 [btrfs] [84434.867012] [] transaction_kthread+0x19e/0x280 [btrfs] [84434.867026] [] ? open_ctree+0x2260/0x2260 [btrfs] [84434.867030] [] kthread+0xed/0x100 [84434.867035] [] ? flush_kthread_worker+0x190/0x190 [84434.867040] [] ret_from_fork+0x7c/0xb0 [84434.867044] [] ? flush_kthread_worker+0x190/0x190 Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6a3f7f50ad37..835b6c9a26a8 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1569,7 +1569,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, struct btrfs_key found_key; while (!ret) { - path->leave_spinning = 1; ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, &found_key); if (ret < 0) @@ -1582,9 +1581,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, parent = found_key.offset; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); @@ -1642,9 +1644,12 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, ++found; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); -- cgit v1.2.3 From d7df2c796d7eedd72a334dc89c65e1fec8171431 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2014 09:21:38 -0500 Subject: Btrfs: attach delayed ref updates to delayed ref heads Currently we have two rb-trees, one for delayed ref heads and one for all of the delayed refs, including the delayed ref heads. When we process the delayed refs we have to hold onto the delayed ref lock for all of the selecting and merging and such, which results in quite a bit of lock contention. This was solved by having a waitqueue and only one flusher at a time, however this hurts if we get a lot of delayed refs queued up. So instead just have an rb tree for the delayed ref heads, and then attach the delayed ref updates to an rb tree that is per delayed ref head. Then we only need to take the delayed ref lock when adding new delayed refs and when selecting a delayed ref head to process, all the rest of the time we deal with a per delayed ref head lock which will be much less contentious. The locking rules for this get a little more complicated since we have to lock up to 3 things to properly process delayed refs, but I will address that problem later. For now this passes all of xfstests and my overnight stress tests. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 835b6c9a26a8..34a8952de8dd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -538,14 +538,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, if (extent_op && extent_op->update_key) btrfs_disk_key_to_cpu(&op_key, &extent_op->key); - while ((n = rb_prev(n))) { + spin_lock(&head->lock); + n = rb_first(&head->ref_root); + while (n) { struct btrfs_delayed_ref_node *node; node = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); - if (node->bytenr != head->node.bytenr) - break; - WARN_ON(node->is_head); - + n = rb_next(n); if (node->seq > seq) continue; @@ -612,10 +611,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, WARN_ON(1); } if (ret) - return ret; + break; } - - return 0; + spin_unlock(&head->lock); + return ret; } /* @@ -882,15 +881,15 @@ again: btrfs_put_delayed_ref(&head->node); goto again; } + spin_unlock(&delayed_refs->lock); ret = __add_delayed_refs(head, time_seq, &prefs_delayed); mutex_unlock(&head->mutex); - if (ret) { - spin_unlock(&delayed_refs->lock); + if (ret) goto out; - } + } else { + spin_unlock(&delayed_refs->lock); } - spin_unlock(&delayed_refs->lock); } if (path->slots[0]) { -- cgit v1.2.3 From 580f0a678ebeba85d30b6a7f22ce32c472263c72 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2014 16:03:45 -0500 Subject: Btrfs: fix extent_from_logical to deal with skinny metadata I don't think this is an issue and I've not seen it in practice but extent_from_logical will fail to find a skinny extent because it uses btrfs_previous_item and gives it the normal extent item type. This is just not a place to use btrfs_previous_item since we care about either normal extents or skinny extents, so open code btrfs_previous_item to properly check. This would only affect metadata and the only place this is used for metadata is scrub and I'm pretty sure it's just for printing stuff out, not actually doing any work so hopefully it was never a problem other than a cosmetic one. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 34a8952de8dd..dcf2448c16f1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1302,20 +1302,45 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) return ret; - ret = btrfs_previous_item(fs_info->extent_root, path, - 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - return ret; - btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); + while (1) { + u32 nritems; + if (path->slots[0] == 0) { + btrfs_set_path_blocking(path); + ret = btrfs_prev_leaf(fs_info->extent_root, path); + if (ret != 0) { + if (ret > 0) { + pr_debug("logical %llu is not within " + "any extent\n", logical); + ret = -ENOENT; + } + return ret; + } + } else { + path->slots[0]--; + } + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems == 0) { + pr_debug("logical %llu is not within any extent\n", + logical); + return -ENOENT; + } + if (path->slots[0] == nritems) + path->slots[0]--; + + btrfs_item_key_to_cpu(path->nodes[0], found_key, + path->slots[0]); + if (found_key->type == BTRFS_EXTENT_ITEM_KEY || + found_key->type == BTRFS_METADATA_ITEM_KEY) + break; + } + if (found_key->type == BTRFS_METADATA_ITEM_KEY) size = fs_info->extent_root->leafsize; else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) size = found_key->offset; - if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && - found_key->type != BTRFS_METADATA_ITEM_KEY) || - found_key->objectid > logical || + if (found_key->objectid > logical || found_key->objectid + size <= logical) { pr_debug("logical %llu is not within any extent\n", logical); return -ENOENT; -- cgit v1.2.3 From 7ef81ac86c8a44ab9f4e6e04e1f4c9ea53615b8a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Jan 2014 14:05:42 -0500 Subject: Btrfs: only process as many file extents as there are refs The backref walking code will search down to the key it is looking for and then proceed to walk _all_ of the extents on the file until it hits the end. This is suboptimal with large files, we only need to look for as many extents as we have references for that inode. I have a testcase that creates a randomly written 4 gig file and before this patch it took 6min 30sec to do the initial send, with this patch it takes 2min 30sec to do the intial send. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dcf2448c16f1..15384968a84a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -209,18 +209,19 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, } static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, - struct ulist *parents, int level, - struct btrfs_key *key_for_search, u64 time_seq, - u64 wanted_disk_byte, - const u64 *extent_item_pos) + struct ulist *parents, struct __prelim_ref *ref, + int level, u64 time_seq, const u64 *extent_item_pos) { int ret = 0; int slot; struct extent_buffer *eb; struct btrfs_key key; + struct btrfs_key *key_for_search = &ref->key_for_search; struct btrfs_file_extent_item *fi; struct extent_inode_elem *eie = NULL, *old = NULL; u64 disk_byte; + u64 wanted_disk_byte = ref->wanted_disk_byte; + u64 count = 0; if (level != 0) { eb = path->nodes[level]; @@ -238,7 +239,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) ret = btrfs_next_old_leaf(root, path, time_seq); - while (!ret) { + while (!ret && count < ref->count) { eb = path->nodes[0]; slot = path->slots[0]; @@ -254,6 +255,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (disk_byte == wanted_disk_byte) { eie = NULL; old = NULL; + count++; if (extent_item_pos) { ret = check_extent_in_eb(&key, eb, fi, *extent_item_pos, @@ -334,9 +336,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, eb = path->nodes[level]; } - ret = add_all_parents(root, path, parents, level, &ref->key_for_search, - time_seq, ref->wanted_disk_byte, - extent_item_pos); + ret = add_all_parents(root, path, parents, ref, level, time_seq, + extent_item_pos); out: path->lowest_level = 0; btrfs_release_path(path); -- cgit v1.2.3 From 538f72cdf03cad1c21c551ea542c8ce7d9fa2d81 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 23 Jan 2014 13:47:48 +0800 Subject: Btrfs: fix protection between walking backrefs and root deletion There is a race condition between resolving indirect ref and root deletion, and we should gurantee that root can not be destroyed to avoid accessing broken tree here. Here we fix it by holding @subvol_srcu, and we will release it as soon as we have held root node lock. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 15384968a84a..10ae5700ab1e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -301,23 +301,34 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, int ret = 0; int root_level; int level = ref->level; + int index; root_key.objectid = ref->root_id; root_key.type = BTRFS_ROOT_ITEM_KEY; root_key.offset = (u64)-1; + + index = srcu_read_lock(&fs_info->subvol_srcu); + root = btrfs_read_fs_root_no_name(fs_info, &root_key); if (IS_ERR(root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); goto out; } root_level = btrfs_old_root_level(root, time_seq); - if (root_level + 1 == level) + if (root_level + 1 == level) { + srcu_read_unlock(&fs_info->subvol_srcu, index); goto out; + } path->lowest_level = level; ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); + + /* root node has been locked, we can release @subvol_srcu safely here */ + srcu_read_unlock(&fs_info->subvol_srcu, index); + pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", ref->root_id, level, ref->count, ret, -- cgit v1.2.3 From 95def2ede1a9dd12b164932eaf5fefb67aefc41c Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 23 Jan 2014 13:47:49 +0800 Subject: Btrfs: fix to catch all errors when resolving indirect ref We can only tolerate ENOENT here, for other errors, we should return directly. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 10ae5700ab1e..55ffcf44b909 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -388,10 +388,16 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, continue; err = __resolve_indirect_ref(fs_info, path, time_seq, ref, parents, extent_item_pos); - if (err == -ENOMEM) - goto out; - if (err) + /* + * we can only tolerate ENOENT,otherwise,we should catch error + * and return directly. + */ + if (err == -ENOENT) { continue; + } else if (err) { + ret = err; + goto out; + } /* we put the first parent into the ref at hand */ ULIST_ITER_INIT(&uiter); -- cgit v1.2.3 From bca1a290033d20981e11f81ae4207e4d0fa5b1e6 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sun, 26 Jan 2014 22:32:18 +0800 Subject: Btrfs: add a reschedule point in btrfs_find_all_roots() I can easily trigger the following warnings when enabling quota in my virtual machine(running Opensuse), Steps are firstly creating a subvolume full of fragment extents, and then create many snapshots (500 in my test case). [ 2362.808459] BUG: soft lockup - CPU#0 stuck for 22s! [btrfs-qgroup-re:1970] [ 2362.809023] task: e4af8450 ti: e371c000 task.ti: e371c000 [ 2362.809026] EIP: 0060:[] EFLAGS: 00000246 CPU: 0 [ 2362.809049] EIP is at __merge_refs+0x5e/0x100 [btrfs] [ 2362.809051] EAX: 00000000 EBX: cfadbcf0 ECX: 00000000 EDX: cfadbcb0 [ 2362.809052] ESI: dd8d3370 EDI: e371dde0 EBP: e371dd6c ESP: e371dd5c [ 2362.809054] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 [ 2362.809055] CR0: 80050033 CR2: ac454d50 CR3: 009a9000 CR4: 001407d0 [ 2362.809099] Stack: [ 2362.809100] 00000001 e371dde0 dfcc6890 f29f8000 e371de28 fa39016d 00000011 00000001 [ 2362.809105] 99bfc000 00000000 93928000 00000000 00000001 00000050 e371dda8 00000001 [ 2362.809109] f3a31000 f3413000 00000001 e371ddb8 000040a8 00000202 00000000 00000023 [ 2362.809113] Call Trace: [ 2362.809136] [] find_parent_nodes+0x34d/0x1280 [btrfs] [ 2362.809156] [] btrfs_find_all_roots+0xb2/0x110 [btrfs] [ 2362.809174] [] btrfs_qgroup_rescan_worker+0x358/0x7a0 [btrfs] [ 2362.809180] [] ? lock_timer_base.isra.39+0x1e/0x40 [ 2362.809199] [] worker_loop+0xff/0x470 [btrfs] [ 2362.809204] [] ? __wake_up_locked+0x1a/0x20 [ 2362.809221] [] ? btrfs_queue_worker+0x2b0/0x2b0 [btrfs] [ 2362.809225] [] kthread+0x9c/0xb0 [ 2362.809229] [] ret_from_kernel_thread+0x1b/0x30 [ 2362.809233] [] ? kthread_create_on_node+0x110/0x110 By adding a reschedule point at the end of btrfs_find_all_roots(), i no longer hit these warnings. Cc: Josef Bacik Signed-off-by: Wang Shilong Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 55ffcf44b909..7966acd5dc7f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1118,6 +1118,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, if (!node) break; bytenr = node->val; + cond_resched(); } ulist_free(tmp); -- cgit v1.2.3 From f05c474688762f186b16a26366755b6ef0bfed0c Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Tue, 28 Jan 2014 19:13:38 +0800 Subject: Btrfs: fix memory leaks on walking backrefs failure When walking backrefs, we may iterate every inode's extent and add/merge them into ulist, and the caller will free memory from ulist. However, if we fail to allocate inode's extents element memory or ulist_add() fail to allocate memory, we won't add allocated memory into ulist, and the caller won't free some allocated memory thus memory leaks happen. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7966acd5dc7f..aded3ef3d3d4 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -66,6 +66,16 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, return 0; } +static void free_inode_elem_list(struct extent_inode_elem *eie) +{ + struct extent_inode_elem *eie_next; + + for (; eie; eie = eie_next) { + eie_next = eie->next; + kfree(eie); + } +} + static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, u64 extent_item_pos, struct extent_inode_elem **eie) @@ -275,6 +285,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, old = old->next; old->next = eie; } + eie = NULL; } next: ret = btrfs_next_old_item(root, path, time_seq); @@ -282,6 +293,8 @@ next: if (ret > 0) ret = 0; + else if (ret < 0) + free_inode_elem_list(eie); return ret; } @@ -845,6 +858,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct list_head prefs_delayed; struct list_head prefs; struct __prelim_ref *ref; + struct extent_inode_elem *eie = NULL; INIT_LIST_HEAD(&prefs); INIT_LIST_HEAD(&prefs_delayed); @@ -958,7 +972,6 @@ again: goto out; } if (ref->count && ref->parent) { - struct extent_inode_elem *eie = NULL; if (extent_item_pos && !ref->inode_list) { u32 bsz; struct extent_buffer *eb; @@ -993,6 +1006,7 @@ again: eie = eie->next; eie->next = ref->inode_list; } + eie = NULL; } list_del(&ref->list); kmem_cache_free(btrfs_prelim_ref_cache, ref); @@ -1011,7 +1025,8 @@ out: list_del(&ref->list); kmem_cache_free(btrfs_prelim_ref_cache, ref); } - + if (ret < 0) + free_inode_elem_list(eie); return ret; } @@ -1019,7 +1034,6 @@ static void free_leaf_list(struct ulist *blocks) { struct ulist_node *node = NULL; struct extent_inode_elem *eie; - struct extent_inode_elem *eie_next; struct ulist_iterator uiter; ULIST_ITER_INIT(&uiter); @@ -1027,10 +1041,7 @@ static void free_leaf_list(struct ulist *blocks) if (!node->aux) continue; eie = (struct extent_inode_elem *)(uintptr_t)node->aux; - for (; eie; eie = eie_next) { - eie_next = eie->next; - kfree(eie); - } + free_inode_elem_list(eie); node->aux = 0; } -- cgit v1.2.3