diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 429 |
1 files changed, 276 insertions, 153 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1204c8ef6f32..9c01509dd8ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/percpu_counter.h> -#include "compat.h" #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -113,7 +112,8 @@ static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) { smp_mb(); - return cache->cached == BTRFS_CACHE_FINISHED; + return cache->cached == BTRFS_CACHE_FINISHED || + cache->cached == BTRFS_CACHE_ERROR; } static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) @@ -389,7 +389,7 @@ static noinline void caching_thread(struct btrfs_work *work) u64 total_found = 0; u64 last = 0; u32 nritems; - int ret = 0; + int ret = -ENOMEM; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; @@ -420,6 +420,7 @@ again: /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); +next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; @@ -459,6 +460,16 @@ again: continue; } + if (key.objectid < last) { + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + + caching_ctl->progress = last; + btrfs_release_path(path); + goto next; + } + if (key.objectid < block_group->key.objectid) { path->slots[0]++; continue; @@ -506,6 +517,12 @@ err: mutex_unlock(&caching_ctl->mutex); out: + if (ret) { + spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; + block_group->cached = BTRFS_CACHE_ERROR; + spin_unlock(&block_group->lock); + } wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); @@ -750,20 +767,19 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - if (metadata) { - key.objectid = bytenr; - key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = offset; - } else { - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = offset; - } - if (!trans) { path->skip_locking = 1; path->search_commit_root = 1; } + +search_again: + key.objectid = bytenr; + key.offset = offset; + if (metadata) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + again: ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); @@ -771,10 +787,22 @@ again: goto out_free; if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->leafsize; - btrfs_release_path(path); - goto again; + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == root->leafsize) + ret = 0; + } + if (ret) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = root->leafsize; + btrfs_release_path(path); + goto again; + } } if (ret == 0) { @@ -825,7 +853,7 @@ again: mutex_lock(&head->mutex); mutex_unlock(&head->mutex); btrfs_put_delayed_ref(&head->node); - goto again; + goto search_again; } if (head->extent_op && head->extent_op->update_flags) extent_flags |= head->extent_op->flags_to_set; @@ -1520,9 +1548,8 @@ again: if (ret && !insert) { err = -ENOENT; goto out; - } else if (ret) { + } else if (WARN_ON(ret)) { err = -EIO; - WARN_ON(1); goto out; } @@ -1948,7 +1975,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u64 refs; int ret; - int err = 0; path = btrfs_alloc_path(); if (!path) @@ -1961,14 +1987,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path, bytenr, num_bytes, parent, root_objectid, owner, offset, refs_to_add, extent_op); - if (ret == 0) + if (ret != -EAGAIN) goto out; - if (ret != -EAGAIN) { - err = ret; - goto out; - } - leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, item); @@ -1990,7 +2011,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, ret); out: btrfs_free_path(path); - return err; + return ret; } static int run_delayed_data_ref(struct btrfs_trans_handle *trans, @@ -2011,6 +2032,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ref = btrfs_delayed_node_to_data_ref(node); + trace_run_delayed_data_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_DATA_REF_KEY) parent = ref->parent; else @@ -2104,15 +2127,28 @@ again: } if (ret > 0) { if (metadata) { - btrfs_release_path(path); - metadata = 0; + if (path->slots[0] > 0) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == node->bytenr && + key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == node->num_bytes) + ret = 0; + } + if (ret > 0) { + btrfs_release_path(path); + metadata = 0; - key.offset = node->num_bytes; - key.type = BTRFS_EXTENT_ITEM_KEY; - goto again; + key.objectid = node->bytenr; + key.offset = node->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; + } + } else { + err = -EIO; + goto out; } - err = -EIO; - goto out; } leaf = path->nodes[0]; @@ -2154,6 +2190,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, SKINNY_METADATA); ref = btrfs_delayed_node_to_tree_ref(node); + trace_run_delayed_tree_ref(node, ref, node->action); + if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) parent = ref->parent; else @@ -2199,8 +2237,12 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, { int ret = 0; - if (trans->aborted) + if (trans->aborted) { + if (insert_reserved) + btrfs_pin_extent(root, node->bytenr, + node->num_bytes, 1); return 0; + } if (btrfs_delayed_ref_is_head(node)) { struct btrfs_delayed_ref_head *head; @@ -2212,6 +2254,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, */ BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); + trace_run_delayed_ref_head(node, head, node->action); + if (insert_reserved) { btrfs_pin_extent(root, node->bytenr, node->num_bytes, 1); @@ -2374,6 +2418,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, btrfs_free_delayed_extent_op(extent_op); if (ret) { + /* + * Need to reset must_insert_reserved if + * there was an error so the abort stuff + * can cleanup the reserved space + * properly. + */ + if (must_insert_reserved) + locked_ref->must_insert_reserved = 1; btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); spin_lock(&delayed_refs->lock); btrfs_delayed_ref_unlock(locked_ref); @@ -2403,6 +2455,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } + } else { + list_del_init(&locked_ref->cluster); } spin_unlock(&delayed_refs->lock); @@ -2425,7 +2479,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { - list_del_init(&locked_ref->cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } @@ -3159,8 +3212,7 @@ again: if (ret) goto out_put; - ret = btrfs_truncate_free_space_cache(root, trans, path, - inode); + ret = btrfs_truncate_free_space_cache(root, trans, inode); if (ret) goto out_put; } @@ -3280,10 +3332,9 @@ again: last = cache->key.objectid + cache->key.offset; err = write_one_cache_group(trans, root, path, cache); + btrfs_put_block_group(cache); if (err) /* File system offline */ goto out; - - btrfs_put_block_group(cache); } while (1) { @@ -3567,10 +3618,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = ALIGN(bytes, root->sectorsize); - if (root == root->fs_info->tree_root || - BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { - alloc_chunk = 0; + if (btrfs_is_free_space_inode(inode)) { committed = 1; + ASSERT(current->journal_info); } data_sinfo = fs_info->data_sinfo; @@ -3598,6 +3648,16 @@ again: spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); + /* + * It is ugly that we don't call nolock join + * transaction for the free space inode case here. + * But it is safe because we only do the data space + * reservation for the free space cache in the + * transaction context, the common join transaction + * just increase the counter of the current transaction + * handler, doesn't try to acquire the trans_lock of + * the fs. + */ trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3643,6 +3703,9 @@ commit_trans: goto again; } + trace_btrfs_space_reservation(root->fs_info, + "space_info:enospc", + data_sinfo->flags, bytes, 1); return -ENOSPC; } data_sinfo->bytes_may_use += bytes; @@ -3799,8 +3862,12 @@ again: if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { + if (should_alloc_chunk(extent_root, space_info, force)) + ret = -ENOSPC; + else + ret = 0; spin_unlock(&space_info->lock); - return 0; + return ret; } if (!should_alloc_chunk(extent_root, space_info, force)) { @@ -3883,7 +3950,6 @@ static int can_overcommit(struct btrfs_root *root, u64 space_size; u64 avail; u64 used; - u64 to_add; used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly; @@ -3917,25 +3983,17 @@ static int can_overcommit(struct btrfs_root *root, BTRFS_BLOCK_GROUP_RAID10)) avail >>= 1; - to_add = space_info->total_bytes; - /* * If we aren't flushing all things, let us overcommit up to * 1/2th of the space. If we can flush, don't let us overcommit * too much, let it overcommit up to 1/8 of the space. */ if (flush == BTRFS_RESERVE_FLUSH_ALL) - to_add >>= 3; + avail >>= 3; else - to_add >>= 1; - - /* - * Limit the overcommit to the amount of free space we could possibly - * allocate for chunks. - */ - to_add = min(avail, to_add); + avail >>= 1; - if (used + bytes < space_info->total_bytes + to_add) + if (used + bytes < space_info->total_bytes + avail) return 1; return 0; } @@ -3956,12 +4014,26 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, * the filesystem is readonly(all dirty pages are written to * the disk). */ - btrfs_start_all_delalloc_inodes(root->fs_info, 0); + btrfs_start_delalloc_roots(root->fs_info, 0); if (!current->journal_info) - btrfs_wait_all_ordered_extents(root->fs_info, 0); + btrfs_wait_ordered_roots(root->fs_info, -1); } } +static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim) +{ + u64 bytes; + int nr; + + bytes = btrfs_calc_trans_metadata_size(root, 1); + nr = (int)div64_u64(to_reclaim, bytes); + if (!nr) + nr = 1; + return nr; +} + +#define EXTENT_SIZE_PER_ITEM (256 * 1024) + /* * shrink metadata reservation for delalloc */ @@ -3974,24 +4046,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, u64 delalloc_bytes; u64 max_reclaim; long time_left; - unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; - int loops = 0; + unsigned long nr_pages; + int loops; + int items; enum btrfs_reserve_flush_enum flush; + /* Calc the number of the pages we need flush for space reservation */ + items = calc_reclaim_items_nr(root, to_reclaim); + to_reclaim = items * EXTENT_SIZE_PER_ITEM; + trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; - smp_mb(); delalloc_bytes = percpu_counter_sum_positive( &root->fs_info->delalloc_bytes); if (delalloc_bytes == 0) { if (trans) return; - btrfs_wait_all_ordered_extents(root->fs_info, 0); + if (wait_ordered) + btrfs_wait_ordered_roots(root->fs_info, items); return; } + loops = 0; while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; @@ -4000,9 +4078,19 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, * We need to wait for the async pages to actually start before * we do anything. */ - wait_event(root->fs_info->async_submit_wait, - !atomic_read(&root->fs_info->async_delalloc_pages)); + max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages); + if (!max_reclaim) + goto skip_async; + + if (max_reclaim <= nr_pages) + max_reclaim = 0; + else + max_reclaim -= nr_pages; + wait_event(root->fs_info->async_submit_wait, + atomic_read(&root->fs_info->async_delalloc_pages) <= + (int)max_reclaim); +skip_async: if (!trans) flush = BTRFS_RESERVE_FLUSH_ALL; else @@ -4016,13 +4104,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, loops++; if (wait_ordered && !trans) { - btrfs_wait_all_ordered_extents(root->fs_info, 0); + btrfs_wait_ordered_roots(root->fs_info, items); } else { time_left = schedule_timeout_killable(1); if (time_left) break; } - smp_mb(); delalloc_bytes = percpu_counter_sum_positive( &root->fs_info->delalloc_bytes); } @@ -4107,16 +4194,11 @@ static int flush_space(struct btrfs_root *root, switch (state) { case FLUSH_DELAYED_ITEMS_NR: case FLUSH_DELAYED_ITEMS: - if (state == FLUSH_DELAYED_ITEMS_NR) { - u64 bytes = btrfs_calc_trans_metadata_size(root, 1); - - nr = (int)div64_u64(num_bytes, bytes); - if (!nr) - nr = 1; - nr *= 2; - } else { + if (state == FLUSH_DELAYED_ITEMS_NR) + nr = calc_reclaim_items_nr(root, num_bytes) * 2; + else nr = -1; - } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -4299,6 +4381,10 @@ out: !block_rsv_use_bytes(global_rsv, orig_bytes)) ret = 0; } + if (ret == -ENOSPC) + trace_btrfs_space_reservation(root->fs_info, + "space_info:enospc", + space_info->flags, orig_bytes, 1); if (flushing) { spin_lock(&space_info->lock); space_info->flush = 0; @@ -4320,6 +4406,9 @@ static struct btrfs_block_rsv *get_block_rsv( if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; + if (root == root->fs_info->uuid_root) + block_rsv = trans->block_rsv; + if (!block_rsv) block_rsv = root->block_rsv; @@ -4420,7 +4509,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", space_info->flags, num_bytes, 0); - space_info->reservation_progress++; spin_unlock(&space_info->lock); } } @@ -4621,7 +4709,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", sinfo->flags, num_bytes, 0); - sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; } @@ -4729,10 +4816,12 @@ void btrfs_orphan_release_metadata(struct inode *inode) int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int items, - u64 *qgroup_reserved) + u64 *qgroup_reserved, + bool use_global_rsv) { u64 num_bytes; int ret; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; if (root->fs_info->quota_enabled) { /* One for parent inode, two for dir entries */ @@ -4751,6 +4840,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, BTRFS_BLOCK_GROUP_METADATA); ret = btrfs_block_rsv_add(root, rsv, num_bytes, BTRFS_RESERVE_FLUSH_ALL); + + if (ret == -ENOSPC && use_global_rsv) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes); + if (ret) { if (*qgroup_reserved) btrfs_qgroup_free(root, *qgroup_reserved); @@ -4946,7 +5039,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); if (to_reserve) - trace_btrfs_space_reservation(root->fs_info,"delalloc", + trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_reserve, 1); block_rsv_add_bytes(block_rsv, to_reserve, 1); @@ -5224,6 +5317,8 @@ static int pin_down_extent(struct btrfs_root *root, set_extent_dirty(root->fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); + if (reserved) + trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); return 0; } @@ -5395,7 +5490,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_readonly += num_bytes; cache->reserved -= num_bytes; space_info->bytes_reserved -= num_bytes; - space_info->reservation_progress++; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); @@ -5668,7 +5762,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); if (ret > 0) btrfs_print_leaf(extent_root, path->nodes[0]); @@ -5679,16 +5773,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } extent_slot = path->slots[0]; } - } else if (ret == -ENOENT) { + } else if (WARN_ON(ret == -ENOENT)) { btrfs_print_leaf(extent_root, path->nodes[0]); - WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root_objectid, - (unsigned long long)owner_objectid, - (unsigned long long)owner_offset); + bytenr, parent, root_objectid, owner_objectid, + owner_offset); } else { btrfs_abort_transaction(trans, extent_root, ret); goto out; @@ -5717,7 +5807,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, -1, 1); if (ret) { btrfs_err(info, "umm, got %d back from search, was looking for %llu", - ret, (unsigned long long)bytenr); + ret, bytenr); btrfs_print_leaf(extent_root, path->nodes[0]); } if (ret < 0) { @@ -5931,6 +6021,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_add_free_space(cache, buf->start, buf->len); btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); + trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; } out: @@ -5999,8 +6090,11 @@ static u64 stripe_align(struct btrfs_root *root, * for our min num_bytes. Another option is to have it go ahead * and look in the rbtree for a free extent of a given size, but this * is a good start. + * + * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using + * any of the information in this block group. */ -static noinline int +static noinline void wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { @@ -6008,28 +6102,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return; wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space_ctl->free_space >= num_bytes)); put_caching_control(caching_ctl); - return 0; } static noinline int wait_block_group_cache_done(struct btrfs_block_group_cache *cache) { struct btrfs_caching_control *caching_ctl; + int ret = 0; caching_ctl = get_caching_control(cache); if (!caching_ctl) - return 0; + return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; wait_event(caching_ctl->wait, block_group_cache_done(cache)); - + if (cache->cached == BTRFS_CACHE_ERROR) + ret = -EIO; put_caching_control(caching_ctl); - return 0; + return ret; } int __get_raid_index(u64 flags) @@ -6065,13 +6160,15 @@ enum btrfs_loop_type { /* * walks the btree of allocated extents and find a hole of a given size. * The key ins is changed to record the hole: - * ins->objectid == block start + * ins->objectid == start position * ins->flags = BTRFS_EXTENT_ITEM_KEY - * ins->offset == number of blocks + * ins->offset == the size of the hole. * Any available blocks before search_start are skipped. + * + * If there is no suitable free space, we will record the max size of + * the free space extent currently. */ -static noinline int find_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *orig_root, +static noinline int find_free_extent(struct btrfs_root *orig_root, u64 num_bytes, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, u64 flags) @@ -6082,6 +6179,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *block_group = NULL; struct btrfs_block_group_cache *used_block_group; u64 search_start = 0; + u64 max_extent_size = 0; int empty_cluster = 2 * 1024 * 1024; struct btrfs_space_info *space_info; int loop = 0; @@ -6212,6 +6310,8 @@ have_block_group: ret = 0; } + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + goto loop; if (unlikely(block_group->ro)) goto loop; @@ -6239,7 +6339,10 @@ have_block_group: btrfs_get_block_group(used_block_group); offset = btrfs_alloc_from_cluster(used_block_group, - last_ptr, num_bytes, used_block_group->key.objectid); + last_ptr, + num_bytes, + used_block_group->key.objectid, + &max_extent_size); if (offset) { /* we have a block, we're done */ spin_unlock(&last_ptr->refill_lock); @@ -6292,18 +6395,20 @@ refill_cluster: block_group->full_stripe_len); /* allocate a cluster in this block group */ - ret = btrfs_find_space_cluster(trans, root, - block_group, last_ptr, - search_start, num_bytes, - aligned_cluster); + ret = btrfs_find_space_cluster(root, block_group, + last_ptr, search_start, + num_bytes, + aligned_cluster); if (ret == 0) { /* * now pull our allocation out of this * cluster */ offset = btrfs_alloc_from_cluster(block_group, - last_ptr, num_bytes, - search_start); + last_ptr, + num_bytes, + search_start, + &max_extent_size); if (offset) { /* we found one, proceed */ spin_unlock(&last_ptr->refill_lock); @@ -6338,13 +6443,18 @@ unclustered_alloc: if (cached && block_group->free_space_ctl->free_space < num_bytes + empty_cluster + empty_size) { + if (block_group->free_space_ctl->free_space > + max_extent_size) + max_extent_size = + block_group->free_space_ctl->free_space; spin_unlock(&block_group->free_space_ctl->tree_lock); goto loop; } spin_unlock(&block_group->free_space_ctl->tree_lock); offset = btrfs_find_space_for_alloc(block_group, search_start, - num_bytes, empty_size); + num_bytes, empty_size, + &max_extent_size); /* * If we didn't find a chunk, and we haven't failed on this * block group before, and this block group is in the middle of @@ -6426,17 +6536,28 @@ loop: index = 0; loop++; if (loop == LOOP_ALLOC_CHUNK) { + struct btrfs_trans_handle *trans; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = do_chunk_alloc(trans, root, flags, CHUNK_ALLOC_FORCE); /* * Do not bail out on ENOSPC since we * can do more things. */ - if (ret < 0 && ret != -ENOSPC) { + if (ret < 0 && ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); + else + ret = 0; + btrfs_end_transaction(trans, root); + if (ret) goto out; - } } if (loop == LOOP_NO_EMPTY_SIZE) { @@ -6451,7 +6572,8 @@ loop: ret = 0; } out: - + if (ret == -ENOSPC) + ins->offset = max_extent_size; return ret; } @@ -6463,19 +6585,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, spin_lock(&info->lock); printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", - (unsigned long long)info->flags, - (unsigned long long)(info->total_bytes - info->bytes_used - - info->bytes_pinned - info->bytes_reserved - - info->bytes_readonly), + info->flags, + info->total_bytes - info->bytes_used - info->bytes_pinned - + info->bytes_reserved - info->bytes_readonly, (info->full) ? "" : "not "); printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " "reserved=%llu, may_use=%llu, readonly=%llu\n", - (unsigned long long)info->total_bytes, - (unsigned long long)info->bytes_used, - (unsigned long long)info->bytes_pinned, - (unsigned long long)info->bytes_reserved, - (unsigned long long)info->bytes_may_use, - (unsigned long long)info->bytes_readonly); + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); spin_unlock(&info->lock); if (!dump_block_groups) @@ -6486,12 +6604,9 @@ again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", - (unsigned long long)cache->key.objectid, - (unsigned long long)cache->key.offset, - (unsigned long long)btrfs_block_group_used(&cache->item), - (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved, - cache->ro ? "[readonly]" : ""); + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -6500,8 +6615,7 @@ again: up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data) @@ -6513,12 +6627,12 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(trans, root, num_bytes, empty_size, - hint_byte, ins, flags); + ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, + flags); if (ret == -ENOSPC) { - if (!final_tried) { - num_bytes = num_bytes >> 1; + if (!final_tried && ins->offset) { + num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = round_down(num_bytes, root->sectorsize); num_bytes = max(num_bytes, min_alloc_size); if (num_bytes == min_alloc_size) @@ -6529,15 +6643,12 @@ again: sinfo = __find_space_info(root->fs_info, flags); btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", - (unsigned long long)flags, - (unsigned long long)num_bytes); + flags, num_bytes); if (sinfo) dump_space_info(sinfo, num_bytes, 1); } } - trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); - return ret; } @@ -6550,7 +6661,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, cache = btrfs_lookup_block_group(root->fs_info, start); if (!cache) { btrfs_err(root->fs_info, "Unable to find block group for %llu", - (unsigned long long)start); + start); return -ENOSPC; } @@ -6646,10 +6757,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } + trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); return ret; } @@ -6674,13 +6785,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, size += sizeof(*block_info); path = btrfs_alloc_path(); - if (!path) + if (!path) { + btrfs_free_and_pin_reserved_extent(root, ins->objectid, + root->leafsize); return -ENOMEM; + } path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); if (ret) { + btrfs_free_and_pin_reserved_extent(root, ins->objectid, + root->leafsize); btrfs_free_path(path); return ret; } @@ -6719,10 +6835,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ret = update_block_group(root, ins->objectid, root->leafsize, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", - (unsigned long long)ins->objectid, - (unsigned long long)ins->offset); + ins->objectid, ins->offset); BUG(); } + + trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); return ret; } @@ -6902,7 +7019,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, empty_size, hint, &ins, 0); if (ret) { unuse_block_rsv(root->fs_info, block_rsv, blocksize); @@ -7173,6 +7290,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!next) return -ENOMEM; + btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, + level - 1); reada = 1; } btrfs_tree_lock(next); @@ -7658,7 +7777,7 @@ out: * don't have it in the radix (like when we recover after a power fail * or unmount) so we don't leak memory. */ - if (root_dropped == false) + if (!for_reloc && root_dropped == false) btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); @@ -7925,7 +8044,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) spin_lock(&sinfo->lock); - for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) if (!list_empty(&sinfo->block_groups[i])) free_bytes += __btrfs_get_ro_block_group_free_space( &sinfo->block_groups[i]); @@ -8192,7 +8311,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) * We haven't cached this block group, which means we could * possibly have excluded extents on this block group. */ - if (block_group->cached == BTRFS_CACHE_NO) + if (block_group->cached == BTRFS_CACHE_NO || + block_group->cached == BTRFS_CACHE_ERROR) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); @@ -8212,15 +8332,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) release_global_block_rsv(info); - while(!list_empty(&info->space_info)) { + while (!list_empty(&info->space_info)) { space_info = list_entry(info->space_info.next, struct btrfs_space_info, list); if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { - if (space_info->bytes_pinned > 0 || + if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_reserved > 0 || - space_info->bytes_may_use > 0) { - WARN_ON(1); + space_info->bytes_may_use > 0)) { dump_space_info(space_info, 0, 0); } } @@ -8409,9 +8528,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) * avoid allocating from un-mirrored block group if there are * mirrored block groups. */ - list_for_each_entry(cache, &space_info->block_groups[3], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_RAID0], + list) set_block_group_ro(cache, 1); - list_for_each_entry(cache, &space_info->block_groups[4], list) + list_for_each_entry(cache, + &space_info->block_groups[BTRFS_RAID_SINGLE], + list) set_block_group_ro(cache, 1); } |