From 1e5063d093b5f024ae35bf835ca07463de2c1a1f Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 12 Jul 2011 10:46:06 -0700 Subject: btrfs: Don't BUG_ON alloc_path errors in replay_one_buffer() The two ->process_func call sites in tree-log.c which were ignoring a return code have also been updated to gracefully exit as well. Signed-off-by: Mark Fasheh --- fs/btrfs/tree-log.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1e..f3cacc079102 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { @@ -1723,7 +1724,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return -ENOMEM; if (*level == 1) { - wc->process_func(root, next, wc, ptr_gen); + ret = wc->process_func(root, next, wc, ptr_gen); + if (ret) + return ret; path->slots[*level]++; if (wc->free) { @@ -1788,8 +1791,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, parent = path->nodes[*level + 1]; root_owner = btrfs_header_owner(parent); - wc->process_func(root, path->nodes[*level], wc, + ret = wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); + if (ret) + return ret; + if (wc->free) { struct extent_buffer *next; -- cgit v1.2.3 From bd681513fa6f2ff29aa391f01e413a2d1c59fd77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 16 Jul 2011 15:23:14 -0400 Subject: Btrfs: switch the btrfs tree locks to reader/writer The btrfs metadata btree is the source of significant lock contention, especially in the root node. This commit changes our locking to use a reader/writer lock. The lock is built on top of rw spinlocks, and it extends the lock tracking to remember if we have a read lock or a write lock when we go to blocking. Atomics count the number of blocking readers or writers at any given time. It removes all of the adaptive spinning from the old code and uses only the spinning/blocking hints inside of btrfs to decide when it should continue spinning. In read heavy workloads this is dramatically faster. In write heavy workloads we're still faster because of less contention on the root node lock. We suffer slightly in dbench because we schedule more often during write locks, but all other benchmarks so far are improved. Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1e..ac278dd83175 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, btrfs_read_buffer(next, ptr_gen); btrfs_tree_lock(next); - clean_tree_block(trans, root, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[*level]; btrfs_tree_lock(next); - clean_tree_block(trans, root, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[orig_level]; btrfs_tree_lock(next); - clean_tree_block(trans, log, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, log, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); -- cgit v1.2.3 From 34f3e4f23ca3d259fe078f62a128d97ca83508ef Mon Sep 17 00:00:00 2001 From: liubo Date: Sat, 6 Aug 2011 08:35:23 +0000 Subject: Btrfs: fix an oops of log replay When btrfs recovers from a crash, it may hit the oops below: ------------[ cut here ]------------ kernel BUG at fs/btrfs/inode.c:4580! [...] RIP: 0010:[] [] btrfs_add_link+0x161/0x1c0 [btrfs] [...] Call Trace: [] ? btrfs_inode_ref_index+0x31/0x80 [btrfs] [] add_inode_ref+0x319/0x3f0 [btrfs] [] replay_one_buffer+0x2c7/0x390 [btrfs] [] walk_down_log_tree+0x32a/0x480 [btrfs] [] walk_log_tree+0xf5/0x240 [btrfs] [] btrfs_recover_log_trees+0x250/0x350 [btrfs] [] ? btrfs_recover_log_trees+0x350/0x350 [btrfs] [] open_ctree+0x1442/0x17d0 [btrfs] [...] This comes from that while replaying an inode ref item, we forget to check those old conflicting DIR_ITEM and DIR_INDEX items in fs/file tree, then we will come to conflict corners which lead to BUG_ON(). Signed-off-by: Liu Bo Tested-by: Andy Lutomirski Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index babee65f8eda..786639fca067 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir; - int ret; struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *dir; struct inode *inode; - char *name; - int namelen; unsigned long ref_ptr; unsigned long ref_end; + char *name; + int namelen; + int ret; int search_done = 0; /* @@ -909,6 +910,25 @@ again: } btrfs_release_path(path); + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + + /* look for a conflicing name */ + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + insert: /* insert our name */ ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, -- cgit v1.2.3