From b2aaaa3b8c2153ef17f0e22287acbf8ee31a7c82 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Jul 2013 17:05:38 -0400 Subject: Btrfs: set lockdep class before locking new extent buffer We've been seeing spurious complaints out of lockdep because the lock class name changes. This is happening because when we drop a snapshot we will lock a block before we've read it in, which sets the lockdep class to whatever the default is. Then once we read the thing in we reset the lockdep class to what it is supposed to be, which blows lockdeps' mind. This patch should fix the problem, it appears to be the only place where we do this sort of thing. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1204c8ef6f32..7e172c6dba68 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7173,6 +7173,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!next) return -ENOMEM; + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, + level - 1); reada = 1; } btrfs_tree_lock(next); -- cgit v1.2.3 From 74be9510876a66ad9826613ac8a526d26f9e7f01 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 5 Jul 2013 23:12:06 +0100 Subject: Btrfs: optimize btrfs_lookup_extent_info() If we're looking for a metadata item in the tree and the search fails with return value of 1, and the slot doesn't point to the first item in the leaf, check if the previous item in the leaf corresponds to an extent item for the same object id - if it does, then don't do another tree search to get it. This optimization is already done by btrfs-progs. V2: updated commit message. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e172c6dba68..3e7e05e6156f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -771,10 +771,23 @@ again: goto out_free; if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; - btrfs_release_path(path); - goto again; + metadata = 0; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->leafsize) + ret = 0; + } + if (ret) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + btrfs_release_path(path); + goto again; + } } if (ret == 0) { -- cgit v1.2.3 From ee3441b49092000402748f5345ee0a3d4c8ac04e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 9 Jul 2013 16:37:21 -0400 Subject: btrfs: fall back to global reservation when removing subvolumes I recently did some ENOSPC testing that involved filling the disk while create and removing snapshots in a loop. During the test cycle, I ran into an ENOSPC when trying to remove a snapshot, leaving the fs stuck in ENOSPC even after a umount/mount cycle. This patch allow subvolume removal to fall back onto the global block reservation in order to succeed when it would have failed otherwise. Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e7e05e6156f..7bea4d2f85dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4742,10 +4742,12 @@ void btrfs_orphan_release_metadata(struct inode *inode) int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int items, - u64 *qgroup_reserved) + u64 *qgroup_reserved, + bool use_global_rsv) { u64 num_bytes; int ret; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ @@ -4764,6 +4766,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_BLOCK_GROUP_METADATA); ret = btrfs_block_rsv_add(root, rsv, num_bytes, BTRFS_RESERVE_FLUSH_ALL); + + if (ret == -ENOSPC && use_global_rsv) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); + if (ret) { if (*qgroup_reserved) btrfs_qgroup_free(root, *qgroup_reserved); -- cgit v1.2.3 From 52ee28d249571616c80df7562bd4412ac253899a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 11 Jul 2013 17:51:15 +0800 Subject: Btrfs: make free space caching faster with many non-inline extent references So to cache free space, we iterate every extent item to gather free space info. When we have say 10,000 non-inline extent refs(such as BTRFS_EXTENT_DATA_REF), it takes quite a long time, and since inline extent refs and non-inline ones have same objectid in their keys, we can just re-search the tree with the next address to skip non-inline references. (This is found by dedup feature because dedup extents can end up with many non-inline extent refs.) Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7bea4d2f85dc..99aa9b77e948 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -420,6 +420,7 @@ again: /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); +next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; @@ -459,6 +460,16 @@ again: continue; } + if (key.objectid < last) { + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + + caching_ctl->progress = last; + btrfs_release_path(path); + goto next; + } + if (key.objectid < block_group->key.objectid) { path->slots[0]++; continue; -- cgit v1.2.3 From 1095cc0d924e1c61fb34e0bfcec049d1fbcca77c Mon Sep 17 00:00:00 2001 From: chandan Date: Tue, 16 Jul 2013 12:28:56 +0530 Subject: btrfs_read_block_groups: Use enums to index btrfs_space_info->block_groups. The current code uses integer literals to index btrfs_space_info->block_groups[] array. Instead use corresponding enums from 'enum btrfs_raid_types'. Signed-off-by: chandan Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 99aa9b77e948..a770a6318433 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8441,9 +8441,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) * avoid allocating from un-mirrored block group if there are * mirrored block groups. */ - list_for_each_entry(cache, &space_info->block_groups[3], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_RAID0], + list) set_block_group_ro(cache, 1); - list_for_each_entry(cache, &space_info->block_groups[4], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_SINGLE], + list) set_block_group_ro(cache, 1); } -- cgit v1.2.3 From 599c75ec3f7f3b606e8a0a684c00f12190712de8 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 16 Jul 2013 19:03:36 +0800 Subject: Btrfs/tracepoint: update delayed ref tracepoints This shows exactly how btrfs processes the delayed refs onto disks, which is very helpful on understanding delayed ref mechanism and debugging related bugs. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a770a6318433..ca02d3c1724c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2035,6 +2035,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); + trace_run_delayed_data_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; else @@ -2178,6 +2180,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); + trace_run_delayed_tree_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; else @@ -2236,6 +2240,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, */ BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); + trace_run_delayed_ref_head(node, head, node->action); + if (insert_reserved) { btrfs_pin_extent(root, node->bytenr, node->num_bytes, 1); -- cgit v1.2.3 From b37b39cd6b8af7a7c39fc7340d71f3db492af9e2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Jul 2013 16:57:15 -0400 Subject: Btrfs: cleanup reloc roots properly on error I was hitting the BUG_ON() at the end of merge_reloc_roots() because we were aborting the transaction at some point previously and then getting an error when we tried to drop the reloc root. I fixed btrfs_drop_snapshot to re-add us to the dead roots list if we failed, but this isn't the right thing to do for reloc roots since it uses root->root_list for it's own stuff in order to know what needs to be cleaned up. So fix btrfs_drop_snapshot to only do the re-add if we aren't dropping for reloc, and handle errors from merge_reloc_root() by dropping the reloc root we are processing since it won't be on the list of roots to cleanup. With this patch my reproducer no longer panics. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ca02d3c1724c..e868c35f760c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7696,7 +7696,7 @@ out: * don't have it in the radix (like when we recover after a power fail * or unmount) so we don't leak memory. */ - if (root_dropped == false) + if (!for_reloc && root_dropped == false) btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); -- cgit v1.2.3 From 36cce922875563a1e2a4b6a53fbe1147f652a51e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 5 Aug 2013 11:15:21 -0400 Subject: Btrfs: handle errors when doing slow caching Alex Lyakas reported a bug where wait_block_group_cache_progress() would wait forever if a drive failed. This is because we just bail out if there is an error while trying to cache a block group, we don't update anybody who may be waiting. So this introduces a new enum for the cache state in case of error and makes everybody bail out if we have an error. Alex tested and verified this patch fixed his problem. This fixes bz 59431. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e868c35f760c..a073f3ece43a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -113,7 +113,8 @@ static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) { smp_mb(); - return cache->cached == BTRFS_CACHE_FINISHED; + return cache->cached == BTRFS_CACHE_FINISHED || + cache->cached == BTRFS_CACHE_ERROR; } static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) @@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work) u64 total_found = 0; u64 last = 0; u32 nritems; - int ret = 0; + int ret = -ENOMEM; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; @@ -517,6 +518,12 @@ err: mutex_unlock(&caching_ctl->mutex); out: + if (ret) { + spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; + block_group->cached = BTRFS_CACHE_ERROR; + spin_unlock(&block_group->lock); + } wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); @@ -6035,8 +6042,11 @@ static u64 stripe_align(struct btrfs_root *root, * for our min num_bytes. Another option is to have it go ahead * and look in the rbtree for a free extent of a given size, but this * is a good start. + * + * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using + * any of the information in this block group. */ -static noinline int +static noinline void wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { @@ -6044,28 +6054,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return; wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space_ctl->free_space >= num_bytes)); put_caching_control(caching_ctl); - return 0; } static noinline int wait_block_group_cache_done(struct btrfs_block_group_cache *cache) { struct btrfs_caching_control *caching_ctl; + int ret = 0; caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; wait_event(caching_ctl->wait, block_group_cache_done(cache)); - + if (cache->cached == BTRFS_CACHE_ERROR) + ret = -EIO; put_caching_control(caching_ctl); - return 0; + return ret; } int __get_raid_index(u64 flags) @@ -6248,6 +6259,8 @@ have_block_group: ret = 0; } + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + goto loop; if (unlikely(block_group->ro)) goto loop; @@ -8230,7 +8243,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) * We haven't cached this block group, which means we could * possibly have excluded extents on this block group. */ - if (block_group->cached == BTRFS_CACHE_NO) + if (block_group->cached == BTRFS_CACHE_NO || + block_group->cached == BTRFS_CACHE_ERROR) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); -- cgit v1.2.3 From 09fb99a696412ae0fceeafc06c987903416503b9 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 5 Aug 2013 16:25:12 +0100 Subject: Btrfs: return ENOSPC when target space is full In extent-tree.c:do_chunk_alloc(), early on we returned 0 (success) when the target space was full and when chunk allocation is needed. However, later on in that same function we return ENOSPC if btrfs_alloc_chunk() fails (and chunk allocation was needed) and set the space's full flag. This was inconsistent, as -ENOSPC should be returned if the space is full and a chunk allocation needs to performed. If the space is full but no chunk allocation is needed, just return 0 (success). Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a073f3ece43a..277d2c26b034 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3836,8 +3836,12 @@ again: if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { + if (should_alloc_chunk(extent_root, space_info, force)) + ret = -ENOSPC; + else + ret = 0; spin_unlock(&space_info->lock); - return 0; + return ret; } if (!should_alloc_chunk(extent_root, space_info, force)) { -- cgit v1.2.3 From 00361589d2eebd90fca022148c763e40d3e90871 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Aug 2013 14:02:47 -0400 Subject: Btrfs: avoid starting a transaction in the write path I noticed while looking at a deadlock that we are always starting a transaction in cow_file_range(). This isn't really needed since we only need a transaction if we are doing an inline extent, or if the allocator needs to allocate a chunk. So push down all the transaction start stuff to be closer to where we actually need a transaction in all of these cases. This will hopefully reduce our write latency when we are committing often. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 277d2c26b034..f1c1694d34b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6121,8 +6121,7 @@ enum btrfs_loop_type { * ins->offset == number of blocks * Any available blocks before search_start are skipped. */ -static noinline int find_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *orig_root, +static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, u64 flags) @@ -6345,10 +6344,10 @@ refill_cluster: block_group->full_stripe_len); /* allocate a cluster in this block group */ - ret = btrfs_find_space_cluster(trans, root, - block_group, last_ptr, - search_start, num_bytes, - aligned_cluster); + ret = btrfs_find_space_cluster(root, block_group, + last_ptr, search_start, + num_bytes, + aligned_cluster); if (ret == 0) { /* * now pull our allocation out of this @@ -6479,17 +6478,28 @@ loop: index = 0; loop++; if (loop == LOOP_ALLOC_CHUNK) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = do_chunk_alloc(trans, root, flags, CHUNK_ALLOC_FORCE); /* * Do not bail out on ENOSPC since we * can do more things. */ - if (ret < 0 && ret != -ENOSPC) { + if (ret < 0 && ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); + else + ret = 0; + btrfs_end_transaction(trans, root); + if (ret) goto out; - } } if (loop == LOOP_NO_EMPTY_SIZE) { @@ -6553,8 +6563,7 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data) @@ -6566,8 +6575,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(trans, root, num_bytes, empty_size, - hint_byte, ins, flags); + ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, + flags); if (ret == -ENOSPC) { if (!final_tried) { @@ -6955,7 +6964,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); -- cgit v1.2.3 From f7a81ea4cc6bdb51d8267d2f3ff485f0b4070074 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:19 +0200 Subject: Btrfs: create UUID tree if required This tree is not created by mkfs.btrfs. Therefore when a filesystem is mounted writable and the UUID tree does not exist, this tree is created if required. The tree is also added to the fs_info structure and initialized, but this commit does not yet read or write UUID tree elements. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f1c1694d34b7..8cb1d41ba501 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4361,6 +4361,9 @@ static struct btrfs_block_rsv *get_block_rsv( if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; + if (root == root->fs_info->uuid_root) + block_rsv = trans->block_rsv; + if (!block_rsv) block_rsv = root->block_rsv; -- cgit v1.2.3 From c1c9ff7c94e83fae89a742df74db51156869bad5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:07 +0200 Subject: Btrfs: Remove superfluous casts from u64 to unsigned long long u64 is "unsigned long long" on all architectures now, so there's no need to cast it when formatting it using the "ll" length modifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 47 +++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 30 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8cb1d41ba501..69083330ee16 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5718,7 +5718,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); if (ret > 0) btrfs_print_leaf(extent_root, path->nodes[0]); @@ -5734,11 +5734,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root_objectid, - (unsigned long long)owner_objectid, - (unsigned long long)owner_offset); + bytenr, parent, root_objectid, owner_objectid, + owner_offset); } else { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5767,7 +5764,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, -1, 1); if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { @@ -6529,19 +6526,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, spin_lock(&info->lock); printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", - (unsigned long long)info->flags, - (unsigned long long)(info->total_bytes - info->bytes_used - - info->bytes_pinned - info->bytes_reserved - - info->bytes_readonly), + info->flags, + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly, (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " "reserved=%llu, may_use=%llu, readonly=%llu\n", - (unsigned long long)info->total_bytes, - (unsigned long long)info->bytes_used, - (unsigned long long)info->bytes_pinned, - (unsigned long long)info->bytes_reserved, - (unsigned long long)info->bytes_may_use, - (unsigned long long)info->bytes_readonly); + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); spin_unlock(&info->lock); if (!dump_block_groups) @@ -6552,12 +6545,9 @@ again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", - (unsigned long long)cache->key.objectid, - (unsigned long long)cache->key.offset, - (unsigned long long)btrfs_block_group_used(&cache->item), - (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved, - cache->ro ? "[readonly]" : ""); + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -6594,8 +6584,7 @@ again: sinfo = __find_space_info(root->fs_info, flags); btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", - (unsigned long long)flags, - (unsigned long long)num_bytes); + flags, num_bytes); if (sinfo) dump_space_info(sinfo, num_bytes, 1); } @@ -6615,7 +6604,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { btrfs_err(root->fs_info, "Unable to find block group for %llu", - (unsigned long long)start); + start); return -ENOSPC; } @@ -6711,8 +6700,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; @@ -6784,8 +6772,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } return ret; -- cgit v1.2.3 From b8d0c69b9469ffd33df30fee3e990f2d4aa68a09 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 22 Aug 2013 17:03:29 -0400 Subject: Btrfs: remove ourselves from the cluster list under lock A user was reporting weird warnings from btrfs_put_delayed_ref() and I noticed that we were doing this list_del_init() on our head ref outside of delayed_refs->lock. This is a problem if we have people still on the list, we could end up modifying old pointers and such. Fix this by removing us from the list before we do our run_delayed_ref on our head ref. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 69083330ee16..cfb3cf711b34 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2440,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } + } else { + list_del_init(&locked_ref->cluster); } spin_unlock(&delayed_refs->lock); @@ -2462,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { - list_del_init(&locked_ref->cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } -- cgit v1.2.3 From a482039889b85c45fc9616e65d560db7a35d4f54 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 9 Sep 2013 13:19:42 +0800 Subject: Btrfs: allocate the free space by the existed max extent size when ENOSPC By the current code, if the requested size is very large, and all the extents in the free space cache are small, we will waste lots of the cpu time to cut the requested size in half and search the cache again and again until it gets down to the size the allocator can return. In fact, we can know the max extent size in the cache after the first search, so we needn't cut the size in half repeatedly, and just use the max extent size directly. This way can save lots of cpu time and make the performance grow up when there are only fragments in the free space cache. According to my test, if there are only 4KB free space extents in the fs, and the total size of those extents are 256MB, we can reduce the execute time of the following test from 5.4s to 1.4s. dd if=/dev/zero of= bs=1MB count=1 oflag=sync Changelog v2 -> v3: - fix the problem that we skip the block group with the space which is less than we need. Changelog v1 -> v2: - address the problem that we return a wrong start position when searching the free space in a bitmap. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cfb3cf711b34..2f03181b777f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6117,10 +6117,13 @@ enum btrfs_loop_type { /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: - * ins->objectid == block start + * ins->objectid == start position * ins->flags = BTRFS_EXTENT_ITEM_KEY - * ins->offset == number of blocks + * ins->offset == the size of the hole. * Any available blocks before search_start are skipped. + * + * If there is no suitable free space, we will record the max size of + * the free space extent currently. */ static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, @@ -6133,6 +6136,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, struct btrfs_block_group_cache *block_group = NULL; struct btrfs_block_group_cache *used_block_group; u64 search_start = 0; + u64 max_extent_size = 0; int empty_cluster = 2 * 1024 * 1024; struct btrfs_space_info *space_info; int loop = 0; @@ -6292,7 +6296,10 @@ have_block_group: btrfs_get_block_group(used_block_group); offset = btrfs_alloc_from_cluster(used_block_group, - last_ptr, num_bytes, used_block_group->key.objectid); + last_ptr, + num_bytes, + used_block_group->key.objectid, + &max_extent_size); if (offset) { /* we have a block, we're done */ spin_unlock(&last_ptr->refill_lock); @@ -6355,8 +6362,10 @@ refill_cluster: * cluster */ offset = btrfs_alloc_from_cluster(block_group, - last_ptr, num_bytes, - search_start); + last_ptr, + num_bytes, + search_start, + &max_extent_size); if (offset) { /* we found one, proceed */ spin_unlock(&last_ptr->refill_lock); @@ -6391,13 +6400,18 @@ unclustered_alloc: if (cached && block_group->free_space_ctl->free_space < num_bytes + empty_cluster + empty_size) { + if (block_group->free_space_ctl->free_space > + max_extent_size) + max_extent_size = + block_group->free_space_ctl->free_space; spin_unlock(&block_group->free_space_ctl->tree_lock); goto loop; } spin_unlock(&block_group->free_space_ctl->tree_lock); offset = btrfs_find_space_for_alloc(block_group, search_start, - num_bytes, empty_size); + num_bytes, empty_size, + &max_extent_size); /* * If we didn't find a chunk, and we haven't failed on this * block group before, and this block group is in the middle of @@ -6515,7 +6529,8 @@ loop: ret = 0; } out: - + if (ret == -ENOSPC) + ins->offset = max_extent_size; return ret; } @@ -6573,8 +6588,8 @@ again: flags); if (ret == -ENOSPC) { - if (!final_tried) { - num_bytes = num_bytes >> 1; + if (!final_tried && ins->offset) { + num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = round_down(num_bytes, root->sectorsize); num_bytes = max(num_bytes, min_alloc_size); if (num_bytes == min_alloc_size) -- cgit v1.2.3 From 14575aef4212d2cc02274bf1f5456f3e644f03f8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Sep 2013 10:48:00 -0400 Subject: Revert "Btrfs: rework the overcommit logic to be based on the total size" This reverts commit 70afa3998c9baed4186df38988246de1abdab56d. It is causing performance issues and wasn't actually correct. There were problems with the way we flushed delalloc and that was the real cause of the early enospc. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2f03181b777f..07f7f6942128 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3925,7 +3925,6 @@ static int can_overcommit(struct btrfs_root *root, u64 space_size; u64 avail; u64 used; - u64 to_add; used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly; @@ -3959,25 +3958,17 @@ static int can_overcommit(struct btrfs_root *root, BTRFS_BLOCK_GROUP_RAID10)) avail >>= 1; - to_add = space_info->total_bytes; - /* * If we aren't flushing all things, let us overcommit up to * 1/2th of the space. If we can flush, don't let us overcommit * too much, let it overcommit up to 1/8 of the space. */ if (flush == BTRFS_RESERVE_FLUSH_ALL) - to_add >>= 3; + avail >>= 3; else - to_add >>= 1; - - /* - * Limit the overcommit to the amount of free space we could possibly - * allocate for chunks. - */ - to_add = min(avail, to_add); + avail >>= 1; - if (used + bytes < space_info->total_bytes + to_add) + if (used + bytes < space_info->total_bytes + avail) return 1; return 0; } -- cgit v1.2.3 From f0de181c9b48a397c5a2fbe63dcdd2a26a872695 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Sep 2013 10:55:51 -0400 Subject: Btrfs: kill delay_iput arg to the wait_ordered functions This is a left over of how we used to wait for ordered extents, which was to grab the inode and then run filemap flush on it. However if we have an ordered extent then we already are holding a ref on the inode, and we just use btrfs_start_ordered_extent anyway, so there is no reason to have an extra ref on the inode to start work on the ordered extent. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 07f7f6942128..88c6db611de3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3991,7 +3991,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, */ btrfs_start_all_delalloc_inodes(root->fs_info, 0); if (!current->journal_info) - btrfs_wait_all_ordered_extents(root->fs_info, 0); + btrfs_wait_all_ordered_extents(root->fs_info); } } @@ -4021,7 +4021,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, if (delalloc_bytes == 0) { if (trans) return; - btrfs_wait_all_ordered_extents(root->fs_info, 0); + btrfs_wait_all_ordered_extents(root->fs_info); return; } @@ -4049,7 +4049,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, loops++; if (wait_ordered && !trans) { - btrfs_wait_all_ordered_extents(root->fs_info, 0); + btrfs_wait_all_ordered_extents(root->fs_info); } else { time_left = schedule_timeout_killable(1); if (time_left) -- cgit v1.2.3 From 363e4d354e51558629d13c5ae456f98c4b209576 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Sep 2013 11:02:25 -0400 Subject: Btrfs: remove space_info->reservation_progress This isn't used for anything anymore, just remove it. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 88c6db611de3..d58bef130a41 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4456,7 +4456,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", space_info->flags, num_bytes, 0); - space_info->reservation_progress++; spin_unlock(&space_info->lock); } } @@ -4657,7 +4656,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", sinfo->flags, num_bytes, 0); - sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; } @@ -5437,7 +5435,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_readonly += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; - space_info->reservation_progress++; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); -- cgit v1.2.3