From 925baeddc5b0764a53f2214a1253251bab0e0324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Start btree concurrency work. The allocation trees and the chunk trees are serialized via their own dedicated mutexes. This means allocation location is still not very fine grained. The main FS btree is protected by locks on each block in the btree. Locks are taken top / down, and as processing finishes on a given level of the tree, the lock is released after locking the lower level. The end result of a search is now a path where only the lowest level is locked. Releasing or freeing the path drops any locks held. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 fs/btrfs/locking.c (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c new file mode 100644 index 000000000000..80813a307b4b --- /dev/null +++ b/fs/btrfs/locking.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "extent_io.h" +#include "locking.h" + +int btrfs_tree_lock(struct extent_buffer *eb) +{ + lock_page(eb->first_page); + return 0; +} + +int btrfs_try_tree_lock(struct extent_buffer *eb) +{ + return TestSetPageLocked(eb->first_page); +} + +int btrfs_tree_unlock(struct extent_buffer *eb) +{ + WARN_ON(!PageLocked(eb->first_page)); + unlock_page(eb->first_page); + return 0; +} + +int btrfs_tree_locked(struct extent_buffer *eb) +{ + return PageLocked(eb->first_page); +} + -- cgit v1.2.3 From f9efa9c784aa3b801feb367f72c6867d26fb348e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:14:04 -0400 Subject: Btrfs: Reduce contention on the root node This calls unlock_up sooner in btrfs_search_slot in order to decrease the amount of work done with the higher level tree locks held. Also, it changes btrfs_tree_lock to spin for a big against the page lock before scheduling. This makes a big difference in context switch rate under highly contended workloads. Longer term, a better locking structure is needed than the page lock. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 80813a307b4b..058a506a0dd8 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -27,6 +27,16 @@ int btrfs_tree_lock(struct extent_buffer *eb) { + int i; + + if (!TestSetPageLocked(eb->first_page)) + return 0; + for (i = 0; i < 512; i++) { + cpu_relax(); + if (!TestSetPageLocked(eb->first_page)) + return 0; + } + cpu_relax(); lock_page(eb->first_page); return 0; } -- cgit v1.2.3 From a61e6f29dc7c9d56a776a518eed92bbc61848263 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:08 -0400 Subject: Btrfs: Use a mutex in the extent buffer for tree block locking This replaces the use of the page cache lock bit for locking, which wasn't suitable for block size < page size and couldn't be used recursively. The mutexes alone don't fix either problem, but they are the first step. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 058a506a0dd8..01a9ac2be3f7 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -29,32 +29,31 @@ int btrfs_tree_lock(struct extent_buffer *eb) { int i; - if (!TestSetPageLocked(eb->first_page)) + if (mutex_trylock(&eb->mutex)) return 0; for (i = 0; i < 512; i++) { cpu_relax(); - if (!TestSetPageLocked(eb->first_page)) + if (mutex_trylock(&eb->mutex)) return 0; } cpu_relax(); - lock_page(eb->first_page); + mutex_lock(&eb->mutex); return 0; } int btrfs_try_tree_lock(struct extent_buffer *eb) { - return TestSetPageLocked(eb->first_page); + return mutex_trylock(&eb->mutex); } int btrfs_tree_unlock(struct extent_buffer *eb) { - WARN_ON(!PageLocked(eb->first_page)); - unlock_page(eb->first_page); + mutex_unlock(&eb->mutex); return 0; } int btrfs_tree_locked(struct extent_buffer *eb) { - return PageLocked(eb->first_page); + return mutex_is_locked(&eb->mutex); } -- cgit v1.2.3 From 6dddcbeb28b34620ad033f1e8d9f6960bafdd7d2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:09 -0400 Subject: Btrfs: Use mutex_lock_nested for tree locking Lockdep has the notion of locking subclasses so that you can identify locks you expect to be taken after other locks of the same class. This changes the per-extent buffer btree locking routines to use a subclass based on the level in the tree. Unfortunately, lockdep can only handle 8 total subclasses, and the btrfs max level is also 8. So when lockdep is on, use a lower max level. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 01a9ac2be3f7..27a02376ab14 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -37,7 +37,7 @@ int btrfs_tree_lock(struct extent_buffer *eb) return 0; } cpu_relax(); - mutex_lock(&eb->mutex); + mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); return 0; } -- cgit v1.2.3 From 4881ee5a2e995c6a8999b56de70aa3834369d8ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 09:51:08 -0400 Subject: Btrfs: Fix some build problems on 2.6.18 based enterprise kernels Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 27a02376ab14..d617c29787fa 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "extent_io.h" #include "locking.h" -- cgit v1.2.3 From bcc63abbf3e9bf948a1b0129b3e6120ec7d7f698 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 30 Jul 2008 16:29:20 -0400 Subject: Btrfs: implement memory reclaim for leaf reference cache The memory reclaiming issue happens when snapshot exists. In that case, some cache entries may not be used during old snapshot dropping, so they will remain in the cache until umount. The patch adds a field to struct btrfs_leaf_ref to record create time. Besides, the patch makes all dead roots of a given snapshot linked together in order of create time. After a old snapshot was completely dropped, we check the dead root list and remove all cache entries created before the oldest dead root in the list. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d617c29787fa..d43e14c7471a 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,4 +56,3 @@ int btrfs_tree_locked(struct extent_buffer *eb) { return mutex_is_locked(&eb->mutex); } - -- cgit v1.2.3 From 65b51a009e29e64c0951f21ea17fdc66bbb0fbd7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 15:11:20 -0400 Subject: btrfs_search_slot: reduce lock contention by cowing in two stages A btree block cow has two parts, the first is to allocate a destination block and the second is to copy the old bock over. The first part needs locks in the extent allocation tree, and may need to do IO. This changeset splits that into a separate function that can be called without any tree locks held. btrfs_search_slot is changed to drop its path and start over if it has to COW a contended block. This often means that many writers will pre-alloc a new destination for a the same contended block, but they cache their prealloc for later use on lower levels in the tree. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index d43e14c7471a..0cc314c10d66 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,3 +56,19 @@ int btrfs_tree_locked(struct extent_buffer *eb) { return mutex_is_locked(&eb->mutex); } + +int btrfs_path_lock_waiting(struct btrfs_path *path, int level) +{ + int i; + struct extent_buffer *eb; + for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { + eb = path->nodes[i]; + if (!eb) + break; + smp_mb(); + if (!list_empty(&eb->mutex.wait_list)) + return 1; + } + return 0; +} + -- cgit v1.2.3 From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 15:18:18 -0400 Subject: Btrfs: add and improve comments This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 0cc314c10d66..e30aa6e2958f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -25,6 +25,15 @@ #include "extent_io.h" #include "locking.h" +/* + * locks the per buffer mutex in an extent buffer. This uses adaptive locks + * and the spin is not tuned very extensively. The spinning does make a big + * difference in almost every workload, but spinning for the right amount of + * time needs some help. + * + * In general, we want to spin as long as the lock holder is doing btree searches, + * and we should give up if they are in more expensive code. + */ int btrfs_tree_lock(struct extent_buffer *eb) { int i; @@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb) return mutex_is_locked(&eb->mutex); } +/* + * btrfs_search_slot uses this to decide if it should drop its locks + * before doing something expensive like allocating free blocks for cow. + */ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) { int i; -- cgit v1.2.3 From d397712bcc6a759a560fd247e6053ecae091f958 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 5 Jan 2009 21:25:51 -0500 Subject: Btrfs: Fix checkpatch.pl warnings There were many, most are fixed now. struct-funcs.c generates some warnings but these are bogus. Signed-off-by: Chris Mason --- fs/btrfs/locking.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/locking.c') diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index e30aa6e2958f..39bae7761db6 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -31,9 +31,10 @@ * difference in almost every workload, but spinning for the right amount of * time needs some help. * - * In general, we want to spin as long as the lock holder is doing btree searches, - * and we should give up if they are in more expensive code. + * In general, we want to spin as long as the lock holder is doing btree + * searches, and we should give up if they are in more expensive code. */ + int btrfs_tree_lock(struct extent_buffer *eb) { int i; -- cgit v1.2.3