From cb517eabba4f109810dba2e5f37b0dcf22103065 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:19 +0000 Subject: Btrfs: cleanup the similar code of the fs root read There are several functions whose code is similar, such as btrfs_find_last_root() btrfs_read_fs_root_no_radix() Besides that, some functions are invoked twice, it is unnecessary, for example, we are sure that all roots which is found in btrfs_find_orphan_roots() have their orphan items, so it is unnecessary to check the orphan item again. So cleanup it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4febca4fc2de..f46b4cca4fa2 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1355,8 +1355,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, BUG_ON(ret); kfree(root_item); - reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, - &root_key); + reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); BUG_ON(IS_ERR(reloc_root)); reloc_root->last_trans = trans->transid; return reloc_root; @@ -4277,7 +4276,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) key.type != BTRFS_ROOT_ITEM_KEY) break; - reloc_root = btrfs_read_fs_root_no_radix(root, &key); + reloc_root = btrfs_read_fs_root(root, &key); if (IS_ERR(reloc_root)) { err = PTR_ERR(reloc_root); goto out; -- cgit v1.2.3 From eb73c1b7cea7d533288ef5297a0ea0e159db85b0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:22 +0000 Subject: Btrfs: introduce per-subvolume delalloc inode list When we create a snapshot, we need flush all delalloc inodes in the fs, just flushing the inodes in the source tree is OK. So we introduce per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f46b4cca4fa2..f6e1b54f05d8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4159,7 +4159,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) (unsigned long long)rc->block_group->key.objectid, (unsigned long long)rc->block_group->flags); - ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); + ret = btrfs_start_all_delalloc_inodes(fs_info, 0); if (ret < 0) { err = ret; goto out; -- cgit v1.2.3 From 199c2a9c3d1389db7f7a211e64f6809d352ce5f6 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:23 +0000 Subject: Btrfs: introduce per-subvolume ordered extent list The reason we introduce per-subvolume ordered extent list is the same as the per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f6e1b54f05d8..aa559f1161df 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4164,7 +4164,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) err = ret; goto out; } - btrfs_wait_ordered_extents(fs_info->tree_root, 0); + btrfs_wait_all_ordered_extents(fs_info, 0); while (1) { mutex_lock(&fs_info->cleaner_mutex); -- cgit v1.2.3 From 5bc7247ac47cf100309e4d7e24214889c46a1636 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Jun 2013 03:28:03 +0000 Subject: Btrfs: fix broken nocow after balance Balance will create reloc_root for each fs root, and it's going to record last_snapshot to filter shared blocks. The side effect of setting last_snapshot is to break nocow attributes of files. Since the extents are not shared by the relocation tree after the balance, we can recover the old last_snapshot safely if no one snapshoted the source tree. We fix the above problem by this way. Reported-by: Kyle Gates Signed-off-by: Liu Bo Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index aa559f1161df..4a404b44b0ad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1305,6 +1305,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, struct extent_buffer *eb; struct btrfs_root_item *root_item; struct btrfs_key root_key; + u64 last_snap = 0; int ret; root_item = kmalloc(sizeof(*root_item), GFP_NOFS); @@ -1320,6 +1321,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, BTRFS_TREE_RELOC_OBJECTID); BUG_ON(ret); + last_snap = btrfs_root_last_snapshot(&root->root_item); btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); } else { @@ -1345,6 +1347,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); root_item->drop_level = 0; + /* + * abuse rtransid, it is safe because it is impossible to + * receive data into a relocation tree. + */ + btrfs_set_root_rtransid(root_item, last_snap); + btrfs_set_root_otransid(root_item, trans->transid); } btrfs_tree_unlock(eb); @@ -2272,8 +2280,12 @@ void free_reloc_roots(struct list_head *list) static noinline_for_stack int merge_reloc_roots(struct reloc_control *rc) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_root *reloc_root; + u64 last_snap; + u64 otransid; + u64 objectid; LIST_HEAD(reloc_roots); int found = 0; int ret = 0; @@ -2307,12 +2319,44 @@ again: } else { list_del_init(&reloc_root->root_list); } + + /* + * we keep the old last snapshod transid in rtranid when we + * created the relocation tree. + */ + last_snap = btrfs_root_rtransid(&reloc_root->root_item); + otransid = btrfs_root_otransid(&reloc_root->root_item); + objectid = reloc_root->root_key.offset; + ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); if (ret < 0) { if (list_empty(&reloc_root->root_list)) list_add_tail(&reloc_root->root_list, &reloc_roots); goto out; + } else if (!ret) { + /* + * recover the last snapshot tranid to avoid + * the space balance break NOCOW. + */ + root = read_fs_root(rc->extent_root->fs_info, + objectid); + if (IS_ERR(root)) + continue; + + if (btrfs_root_refs(&root->root_item) == 0) + continue; + + trans = btrfs_join_transaction(root); + BUG_ON(IS_ERR(trans)); + + /* Check if the fs/file tree was snapshoted or not. */ + if (btrfs_root_last_snapshot(&root->root_item) == + otransid - 1) + btrfs_set_root_last_snapshot(&root->root_item, + last_snap); + + btrfs_end_transaction(trans, root); } } -- cgit v1.2.3 From aee68ee5f5427b91be5b23459993134ca64ecf00 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Jun 2013 13:50:23 -0400 Subject: Btrfs: fix not being able to find skinny extents during relocate We unconditionally search for the EXTENT_ITEM_KEY for metadata during balance, and then check the key that we found to see if it is actually a METADATA_ITEM_KEY, but this doesn't work right because METADATA is a higher key value, so if what we are looking for happens to be the first item in the leaf the search will dump us out at the previous leaf, and we won't find our item. So instead do what we do everywhere else, search for the skinny extent first and if we don't find it go back and re-search for the extent item. This patch fixes the panic I was hitting when balancing a large file system with skinny extents. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4a404b44b0ad..d91f106df665 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3309,6 +3309,8 @@ static int __add_tree_block(struct reloc_control *rc, struct btrfs_path *path; struct btrfs_key key; int ret; + bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, + SKINNY_METADATA); if (tree_block_processed(bytenr, blocksize, rc)) return 0; @@ -3319,10 +3321,15 @@ static int __add_tree_block(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = blocksize; + if (skinny) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = blocksize; + } path->search_commit_root = 1; path->skip_locking = 1; @@ -3330,11 +3337,23 @@ static int __add_tree_block(struct reloc_control *rc, if (ret < 0) goto out; - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (ret > 0) { - if (key.objectid == bytenr && - key.type == BTRFS_METADATA_ITEM_KEY) - ret = 0; + if (ret > 0 && skinny) { + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + (key.type == BTRFS_METADATA_ITEM_KEY || + (key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == blocksize))) + ret = 0; + } + + if (ret) { + skinny = false; + btrfs_release_path(path); + goto again; + } } BUG_ON(ret); -- cgit v1.2.3 From f51a4a1826ff810eb9c00cadff8978b028c40756 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 19 Jun 2013 10:36:09 +0800 Subject: Btrfs: remove btrfs_sector_sum structure Using the structure btrfs_sector_sum to keep the checksum value is unnecessary, because the extents that btrfs_sector_sum points to are continuous, we can find out the expected checksums by btrfs_ordered_sum's bytenr and the offset, so we can remove btrfs_sector_sum's bytenr. After removing bytenr, there is only one member in the structure, so it makes no sense to keep the structure, just remove it, and use a u32 array to store the checksum value. By this change, we don't use the while loop to get the checksums one by one. Now, we can get several checksum value at one time, it improved the performance by ~74% on my SSD (31MB/s -> 54MB/s). test command: # dd if=/dev/zero of=/mnt/btrfs/file0 bs=1M count=1024 oflag=sync Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d91f106df665..12096496cc99 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4458,10 +4458,8 @@ out: int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) { struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; struct btrfs_ordered_extent *ordered; struct btrfs_root *root = BTRFS_I(inode)->root; - size_t offset; int ret; u64 disk_bytenr; LIST_HEAD(list); @@ -4475,19 +4473,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) if (ret) goto out; + disk_bytenr = ordered->start; while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); list_del_init(&sums->list); - sector_sum = sums->sums; - sums->bytenr = ordered->start; - - offset = 0; - while (offset < sums->len) { - sector_sum->bytenr += ordered->start - disk_bytenr; - sector_sum++; - offset += root->sectorsize; - } + sums->bytenr = disk_bytenr; + disk_bytenr += sums->len; btrfs_add_ordered_sum(inode, ordered, sums); } -- cgit v1.2.3