From 581bb050941b4f220f84d3e5ed6dace3d42dd382 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:06:11 +0800 Subject: Btrfs: Cache free inode numbers in memory Currently btrfs stores the highest objectid of the fs tree, and it always returns (highest+1) inode number when we create a file, so inode numbers won't be reclaimed when we delete files, so we'll run out of inode numbers as we keep create/delete files in 32bits machines. This fixes it, and it works similarly to how we cache free space in block cgroups. We start a kernel thread to read the file tree. By scanning inode items, we know which chunks of inode numbers are free, and we cache them in an rb-tree. Because we are searching the commit root, we have to carefully handle the cross-transaction case. The rb-tree is a hybrid extent+bitmap tree, so if we have too many small chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram of extents, and a bitmap will be used if we exceed this threshold. The extents threshold is adjusted in runtime. Signed-off-by: Li Zefan --- fs/btrfs/inode.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a4157cfdd533..77dd0a776c83 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -51,6 +51,7 @@ #include "compression.h" #include "locking.h" #include "free-space-cache.h" +#include "inode-map.h" struct btrfs_iget_args { u64 ino; @@ -3809,6 +3810,10 @@ void btrfs_evict_inode(struct inode *inode) BUG_ON(ret); } + if (!(root == root->fs_info->tree_root || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) + btrfs_return_ino(root, inode->i_ino); + nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -4538,6 +4543,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); } + /* + * we have to initialize this early, so we can reclaim the inode + * number if we fail afterwards in this function. + */ + inode->i_ino = objectid; + if (dir) { trace_btrfs_inode_request(dir); @@ -4583,7 +4594,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, goto fail; inode_init_owner(inode, dir, mode); - inode->i_ino = objectid; inode_set_bytes(inode, 0); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], @@ -4712,10 +4722,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); - if (err) - return err; - /* * 2 for inode item and ref * 2 for dir items @@ -4727,6 +4733,10 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); + err = btrfs_find_free_ino(root, &objectid); + if (err) + goto out_unlock; + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, mode, &index); @@ -4774,9 +4784,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); - if (err) - return err; /* * 2 for inode item and ref * 2 for dir items @@ -4788,6 +4795,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); + err = btrfs_find_free_ino(root, &objectid); + if (err) + goto out_unlock; + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, mode, &index); @@ -4902,10 +4913,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); - if (err) - return err; - /* * 2 items for inode and ref * 2 items for dir items @@ -4916,6 +4923,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return PTR_ERR(trans); btrfs_set_trans_block_group(trans, dir); + err = btrfs_find_free_ino(root, &objectid); + if (err) + goto out_fail; + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFDIR | mode, @@ -7257,9 +7268,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); - if (err) - return err; /* * 2 items for inode item and ref * 2 items for dir items @@ -7271,6 +7279,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); + err = btrfs_find_free_ino(root, &objectid); + if (err) + goto out_unlock; + inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, -- cgit v1.2.3 From 33345d01522f8152f99dc84a3e7a1a45707f387f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:31:50 +0800 Subject: Btrfs: Always use 64bit inode number There's a potential problem in 32bit system when we exhaust 32bit inode numbers and start to allocate big inode numbers, because btrfs uses inode->i_ino in many places. So here we always use BTRFS_I(inode)->location.objectid, which is an u64 variable. There are 2 exceptions that BTRFS_I(inode)->location.objectid != inode->i_ino: the btree inode (0 vs 1) and empty subvol dirs (256 vs 2), and inode->i_ino will be used in those cases. Another reason to make this change is I'm going to use a special inode to save free ino cache, and the inode number must be > (u64)-256. Signed-off-by: Li Zefan --- fs/btrfs/inode.c | 197 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 103 insertions(+), 94 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 77dd0a776c83..adec22884a3e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -138,7 +138,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, path->leave_spinning = 1; btrfs_set_trans_block_group(trans, inode); - key.objectid = inode->i_ino; + key.objectid = btrfs_ino(inode); key.offset = start; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(cur_size); @@ -1049,6 +1049,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, int nocow; int check_prev = 1; bool nolock = false; + u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); BUG_ON(!path); @@ -1063,14 +1064,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, cow_start = (u64)-1; cur_offset = start; while (1) { - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + ret = btrfs_lookup_file_extent(trans, root, path, ino, cur_offset, 0); BUG_ON(ret < 0); if (ret > 0 && path->slots[0] > 0 && check_prev) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); - if (found_key.objectid == inode->i_ino && + if (found_key.objectid == ino && found_key.type == BTRFS_EXTENT_DATA_KEY) path->slots[0]--; } @@ -1091,7 +1092,7 @@ next_slot: num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > inode->i_ino || + if (found_key.objectid > ino || found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; @@ -1126,7 +1127,7 @@ next_slot: goto out_check; if (btrfs_extent_readonly(root, disk_bytenr)) goto out_check; - if (btrfs_cross_ref_exist(trans, root, inode->i_ino, + if (btrfs_cross_ref_exist(trans, root, ino, found_key.offset - extent_offset, disk_bytenr)) goto out_check; @@ -1643,7 +1644,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, &hint, 0); BUG_ON(ret); - ins.objectid = inode->i_ino; + ins.objectid = btrfs_ino(inode); ins.offset = file_pos; ins.type = BTRFS_EXTENT_DATA_KEY; ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); @@ -1674,7 +1675,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_alloc_reserved_file_extent(trans, root, root->root_key.objectid, - inode->i_ino, file_pos, &ins); + btrfs_ino(inode), file_pos, &ins); BUG_ON(ret); btrfs_free_path(path); @@ -2004,8 +2005,9 @@ good: zeroit: if (printk_ratelimit()) { - printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " - "private %llu\n", page->mapping->host->i_ino, + printk(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u " + "private %llu\n", + (unsigned long long)btrfs_ino(page->mapping->host), (unsigned long long)start, csum, (unsigned long long)private); } @@ -2243,7 +2245,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { - ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); + ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); BUG_ON(ret); } @@ -2280,7 +2282,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) spin_unlock(&root->orphan_lock); if (trans && delete_item) { - ret = btrfs_del_orphan_item(trans, root, inode->i_ino); + ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); BUG_ON(ret); } @@ -2542,7 +2544,8 @@ static void btrfs_read_locked_inode(struct inode *inode) * try to precache a NULL acl entry for files that don't have * any xattrs or acls */ - maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); + maybe_acls = acls_after_inode_item(leaf, path->slots[0], + btrfs_ino(inode)); if (!maybe_acls) cache_no_acl(inode); @@ -2688,6 +2691,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di; struct btrfs_key key; u64 index; + u64 ino = btrfs_ino(inode); + u64 dir_ino = btrfs_ino(dir); path = btrfs_alloc_path(); if (!path) { @@ -2696,7 +2701,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, } path->leave_spinning = 1; - di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2713,17 +2718,16 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; btrfs_release_path(root, path); - ret = btrfs_del_inode_ref(trans, root, name, name_len, - inode->i_ino, - dir->i_ino, &index); + ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, + dir_ino, &index); if (ret) { printk(KERN_INFO "btrfs failed to delete reference to %.*s, " - "inode %lu parent %lu\n", name_len, name, - inode->i_ino, dir->i_ino); + "inode %llu parent %llu\n", name_len, name, + (unsigned long long)ino, (unsigned long long)dir_ino); goto err; } - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2737,7 +2741,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, - inode, dir->i_ino); + inode, dir_ino); BUG_ON(ret != 0 && ret != -ENOENT); ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, @@ -2815,12 +2819,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, int check_link = 1; int err = -ENOSPC; int ret; + u64 ino = btrfs_ino(inode); + u64 dir_ino = btrfs_ino(dir); trans = btrfs_start_transaction(root, 10); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; - if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return ERR_PTR(-ENOSPC); /* check if there is someone else holds reference */ @@ -2879,7 +2885,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, if (ret == 0 && S_ISREG(inode->i_mode)) { ret = btrfs_lookup_file_extent(trans, root, path, - inode->i_ino, (u64)-1, 0); + ino, (u64)-1, 0); if (ret < 0) { err = ret; goto out; @@ -2895,7 +2901,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, goto out; } - di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_item(trans, root, path, dir_ino, dentry->d_name.name, dentry->d_name.len, 0); if (IS_ERR(di)) { err = PTR_ERR(di); @@ -2912,7 +2918,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, ref = btrfs_lookup_inode_ref(trans, root, path, dentry->d_name.name, dentry->d_name.len, - inode->i_ino, dir->i_ino, 0); + ino, dir_ino, 0); if (IS_ERR(ref)) { err = PTR_ERR(ref); goto out; @@ -2923,7 +2929,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, index = btrfs_inode_ref_index(path->nodes[0], ref); btrfs_release_path(root, path); - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, + di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, dentry->d_name.name, dentry->d_name.len, 0); if (IS_ERR(di)) { err = PTR_ERR(di); @@ -2998,12 +3004,13 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, struct btrfs_key key; u64 index; int ret; + u64 dir_ino = btrfs_ino(dir); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, name_len, -1); BUG_ON(!di || IS_ERR(di)); @@ -3016,10 +3023,10 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, - dir->i_ino, &index, name, name_len); + dir_ino, &index, name, name_len); if (ret < 0) { BUG_ON(ret != -ENOENT); - di = btrfs_search_dir_index_item(root, path, dir->i_ino, + di = btrfs_search_dir_index_item(root, path, dir_ino, name, name_len); BUG_ON(!di || IS_ERR(di)); @@ -3029,7 +3036,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, index = key.offset; } - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index, name, name_len, -1); BUG_ON(!di || IS_ERR(di)); @@ -3058,7 +3065,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) unsigned long nr = 0; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || - inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; trans = __unlink_start_trans(dir, dentry); @@ -3067,7 +3074,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_set_trans_block_group(trans, dir); - if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, root, dir, BTRFS_I(inode)->location.objectid, dentry->d_name.name, @@ -3299,6 +3306,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int encoding; int ret; int err = 0; + u64 ino = btrfs_ino(inode); BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); @@ -3309,7 +3317,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(!path); path->reada = -1; - key.objectid = inode->i_ino; + key.objectid = ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -3337,7 +3345,7 @@ search_again: found_type = btrfs_key_type(&found_key); encoding = 0; - if (found_key.objectid != inode->i_ino) + if (found_key.objectid != ino) break; if (found_type < min_type) @@ -3456,7 +3464,7 @@ delete: ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), - inode->i_ino, extent_offset); + ino, extent_offset); BUG_ON(ret); } @@ -3655,7 +3663,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) break; err = btrfs_insert_file_extent(trans, root, - inode->i_ino, cur_offset, 0, + btrfs_ino(inode), cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); if (err) @@ -3812,7 +3820,7 @@ void btrfs_evict_inode(struct inode *inode) if (!(root == root->fs_info->tree_root || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) - btrfs_return_ino(root, inode->i_ino); + btrfs_return_ino(root, btrfs_ino(inode)); nr = trans->blocks_used; btrfs_end_transaction(trans, root); @@ -3839,7 +3847,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); - di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, + di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, namelen, 0); if (IS_ERR(di)) ret = PTR_ERR(di); @@ -3892,7 +3900,7 @@ static int fixup_tree_root_location(struct btrfs_root *root, leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); - if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || + if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) || btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) goto out; @@ -3931,6 +3939,7 @@ static void inode_tree_add(struct inode *inode) struct btrfs_inode *entry; struct rb_node **p; struct rb_node *parent; + u64 ino = btrfs_ino(inode); again: p = &root->inode_tree.rb_node; parent = NULL; @@ -3943,9 +3952,9 @@ again: parent = *p; entry = rb_entry(parent, struct btrfs_inode, rb_node); - if (inode->i_ino < entry->vfs_inode.i_ino) + if (ino < btrfs_ino(&entry->vfs_inode)) p = &parent->rb_left; - else if (inode->i_ino > entry->vfs_inode.i_ino) + else if (ino > btrfs_ino(&entry->vfs_inode)) p = &parent->rb_right; else { WARN_ON(!(entry->vfs_inode.i_state & @@ -4009,9 +4018,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < entry->vfs_inode.i_ino) + if (objectid < btrfs_ino(&entry->vfs_inode)) node = node->rb_left; - else if (objectid > entry->vfs_inode.i_ino) + else if (objectid > btrfs_ino(&entry->vfs_inode)) node = node->rb_right; else break; @@ -4019,7 +4028,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= entry->vfs_inode.i_ino) { + if (objectid <= btrfs_ino(&entry->vfs_inode)) { node = prev; break; } @@ -4028,7 +4037,7 @@ again: } while (node) { entry = rb_entry(node, struct btrfs_inode, rb_node); - objectid = entry->vfs_inode.i_ino + 1; + objectid = btrfs_ino(&entry->vfs_inode) + 1; inode = igrab(&entry->vfs_inode); if (inode) { spin_unlock(&root->inode_lock); @@ -4066,7 +4075,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; - return args->ino == inode->i_ino && + return args->ino == btrfs_ino(inode) && args->root == BTRFS_I(inode)->root; } @@ -4244,9 +4253,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, /* special case for "." */ if (filp->f_pos == 0) { - over = filldir(dirent, ".", 1, - 1, inode->i_ino, - DT_DIR); + over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); if (over) return 0; filp->f_pos = 1; @@ -4265,7 +4272,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; - key.objectid = inode->i_ino; + key.objectid = btrfs_ino(inode); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -4420,8 +4427,9 @@ void btrfs_dirty_inode(struct inode *inode) if (IS_ERR(trans)) { if (printk_ratelimit()) { printk(KERN_ERR "btrfs: fail to " - "dirty inode %lu error %ld\n", - inode->i_ino, PTR_ERR(trans)); + "dirty inode %llu error %ld\n", + (unsigned long long)btrfs_ino(inode), + PTR_ERR(trans)); } return; } @@ -4431,8 +4439,9 @@ void btrfs_dirty_inode(struct inode *inode) if (ret) { if (printk_ratelimit()) { printk(KERN_ERR "btrfs: fail to " - "dirty inode %lu error %d\n", - inode->i_ino, ret); + "dirty inode %llu error %d\n", + (unsigned long long)btrfs_ino(inode), + ret); } } } @@ -4452,7 +4461,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) struct extent_buffer *leaf; int ret; - key.objectid = inode->i_ino; + key.objectid = btrfs_ino(inode); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = (u64)-1; @@ -4484,7 +4493,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != inode->i_ino || + if (found_key.objectid != btrfs_ino(inode) || btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { BTRFS_I(inode)->index_cnt = 2; goto out; @@ -4657,29 +4666,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(parent_inode)->root; + u64 ino = btrfs_ino(inode); + u64 parent_ino = btrfs_ino(parent_inode); - if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); } else { - key.objectid = inode->i_ino; + key.objectid = ino; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; } - if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, key.objectid, root->root_key.objectid, - parent_inode->i_ino, - index, name, name_len); + parent_ino, index, name, name_len); } else if (add_backref) { - ret = btrfs_insert_inode_ref(trans, root, - name, name_len, inode->i_ino, - parent_inode->i_ino, index); + ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino, + parent_ino, index); } if (ret == 0) { ret = btrfs_insert_dir_item(trans, root, name, name_len, - parent_inode->i_ino, &key, + parent_ino, &key, btrfs_inode_type(inode), index); BUG_ON(ret); @@ -4738,7 +4747,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, dir->i_ino, objectid, + dentry->d_name.len, btrfs_ino(dir), objectid, BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) @@ -4800,7 +4809,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, dir->i_ino, objectid, + dentry->d_name.len, btrfs_ino(dir), objectid, BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) @@ -4928,7 +4937,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_fail; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, dir->i_ino, objectid, + dentry->d_name.len, btrfs_ino(dir), objectid, BTRFS_I(dir)->block_group, S_IFDIR | mode, &index); if (IS_ERR(inode)) { @@ -5049,7 +5058,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 bytenr; u64 extent_start = 0; u64 extent_end = 0; - u64 objectid = inode->i_ino; + u64 objectid = btrfs_ino(inode); u32 found_type; struct btrfs_path *path = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -5557,7 +5566,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), offset, 0); if (ret < 0) goto out; @@ -5574,7 +5583,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, ret = 0; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != inode->i_ino || + if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) { /* not our file or wrong item type, must cow */ goto out; @@ -5608,7 +5617,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, * look for other files referencing this extent, if we * find any we must cow */ - if (btrfs_cross_ref_exist(trans, root, inode->i_ino, + if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), key.offset - backref_offset, disk_bytenr)) goto out; @@ -5798,9 +5807,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) flush_dcache_page(bvec->bv_page); if (csum != *private) { - printk(KERN_ERR "btrfs csum failed ino %lu off" + printk(KERN_ERR "btrfs csum failed ino %llu off" " %llu csum %u private %u\n", - inode->i_ino, (unsigned long long)start, + (unsigned long long)btrfs_ino(inode), + (unsigned long long)start, csum, *private); err = -EIO; } @@ -5947,9 +5957,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) struct btrfs_dio_private *dip = bio->bi_private; if (err) { - printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " + printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " "sector %#Lx len %u err no %d\n", - dip->inode->i_ino, bio->bi_rw, + (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, (unsigned long long)bio->bi_sector, bio->bi_size, err); dip->errors = 1; @@ -6859,8 +6869,8 @@ void btrfs_destroy_inode(struct inode *inode) spin_lock(&root->orphan_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", - inode->i_ino); + printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", + (unsigned long long)btrfs_ino(inode)); list_del_init(&BTRFS_I(inode)->i_orphan); } spin_unlock(&root->orphan_lock); @@ -6999,16 +7009,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, u64 index = 0; u64 root_objectid; int ret; + u64 old_ino = btrfs_ino(old_inode); - if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; /* we only allow rename subvolume link between subvolumes */ - if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) return -EXDEV; - if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || - (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) + if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || + (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID)) return -ENOTEMPTY; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -7024,7 +7035,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, filemap_flush(old_inode->i_mapping); /* close the racy window with snapshot create/destroy ioctl */ - if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + if (old_ino == BTRFS_FIRST_FREE_OBJECTID) down_read(&root->fs_info->subvol_sem); /* * We want to reserve the absolute worst case amount of items. So if @@ -7049,15 +7060,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret) goto out_fail; - if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { /* force full log commit if subvolume involved. */ root->fs_info->last_trans_log_full_commit = trans->transid; } else { ret = btrfs_insert_inode_ref(trans, dest, new_dentry->d_name.name, new_dentry->d_name.len, - old_inode->i_ino, - new_dir->i_ino, index); + old_ino, + btrfs_ino(new_dir), index); if (ret) goto out_fail; /* @@ -7073,10 +7084,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * make sure the inode gets flushed if it is replacing * something. */ - if (new_inode && new_inode->i_size && - old_inode && S_ISREG(old_inode->i_mode)) { + if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) btrfs_add_ordered_operation(trans, root, old_inode); - } old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; @@ -7085,7 +7094,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); - if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, old_dentry->d_name.name, @@ -7102,7 +7111,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - if (unlikely(new_inode->i_ino == + if (unlikely(btrfs_ino(new_inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { root_objectid = BTRFS_I(new_inode)->location.objectid; ret = btrfs_unlink_subvol(trans, dest, new_dir, @@ -7130,7 +7139,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.len, 0, index); BUG_ON(ret); - if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { struct dentry *parent = dget_parent(new_dentry); btrfs_log_new_name(trans, old_inode, old_dir, parent); dput(parent); @@ -7139,7 +7148,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, out_fail: btrfs_end_transaction_throttle(trans, root); out_notrans: - if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + if (old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); return ret; @@ -7284,7 +7293,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, dir->i_ino, objectid, + dentry->d_name.len, btrfs_ino(dir), objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, &index); err = PTR_ERR(inode); @@ -7315,7 +7324,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); - key.objectid = inode->i_ino; + key.objectid = btrfs_ino(inode); key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(name_len); -- cgit v1.2.3 From 82d5902d9c681be37ffa9d70482907f9f0b7ec1f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:33:24 +0800 Subject: Btrfs: Support reading/writing on disk free ino cache This is similar to block group caching. We dedicate a special inode in fs tree to save free ino cache. At the very first time we create/delete a file after mount, the free ino cache will be loaded from disk into memory. When the fs tree is commited, the cache will be written back to disk. To keep compatibility, we check the root generation against the generation of the special inode when loading the cache, so the loading will fail if the btrfs filesystem was mounted in an older kernel before. Signed-off-by: Li Zefan --- fs/btrfs/inode.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index adec22884a3e..b78d3ab789ca 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -745,6 +745,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, return alloc_hint; } +static inline bool is_free_space_inode(struct btrfs_root *root, + struct inode *inode) +{ + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) + return true; + return false; +} + /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to @@ -777,7 +786,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(root == root->fs_info->tree_root); + BUG_ON(is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -1048,17 +1057,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, int type; int nocow; int check_prev = 1; - bool nolock = false; + bool nolock; u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); BUG_ON(!path); - if (root == root->fs_info->tree_root) { - nolock = true; + + nolock = is_free_space_inode(root, inode); + + if (nolock) trans = btrfs_join_transaction_nolock(root, 1); - } else { + else trans = btrfs_join_transaction(root, 1); - } BUG_ON(IS_ERR(trans)); cow_start = (u64)-1; @@ -1316,8 +1326,7 @@ static int btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - int do_list = (root->root_key.objectid != - BTRFS_ROOT_TREE_OBJECTID); + bool do_list = !is_free_space_inode(root, inode); if (*bits & EXTENT_FIRST_DELALLOC) *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1350,8 +1359,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - int do_list = (root->root_key.objectid != - BTRFS_ROOT_TREE_OBJECTID); + bool do_list = !is_free_space_inode(root, inode); if (*bits & EXTENT_FIRST_DELALLOC) *bits &= ~EXTENT_FIRST_DELALLOC; @@ -1458,7 +1466,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (root == root->fs_info->tree_root) + if (is_free_space_inode(root, inode)) ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); else ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); @@ -1701,7 +1709,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct extent_state *cached_state = NULL; int compress_type = 0; int ret; - bool nolock = false; + bool nolock; ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, end - start + 1); @@ -1709,7 +1717,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) return 0; BUG_ON(!ordered_extent); - nolock = (root == root->fs_info->tree_root); + nolock = is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); @@ -3473,7 +3481,9 @@ delete: if (path->slots[0] == 0 || path->slots[0] != pending_del_slot) { - if (root->ref_cows) { + if (root->ref_cows && + BTRFS_I(inode)->location.objectid != + BTRFS_FREE_INO_OBJECTID) { err = -EAGAIN; goto out; } @@ -3765,7 +3775,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - root == root->fs_info->tree_root)) + is_free_space_inode(root, inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4382,7 +4392,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; smp_mb(); - nolock = (root->fs_info->closing && root == root->fs_info->tree_root); + if (root->fs_info->closing && is_free_space_inode(root, inode)) + nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { if (nolock) @@ -6900,7 +6911,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - root != root->fs_info->tree_root) + !is_free_space_inode(root, inode)) return 1; else return generic_drop_inode(inode); -- cgit v1.2.3 From 8d413713ca744fa00cf4e05d4054d80727b84789 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 25 Apr 2011 19:43:52 -0400 Subject: Btrfs: check return value of kmalloc() The check on the return value of kmalloc() is added to some places. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a4157cfdd533..c718d274a352 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -953,6 +953,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, 1, 0, NULL, GFP_NOFS); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); + BUG_ON(!async_cow); async_cow->inode = inode; async_cow->root = root; async_cow->locked_page = locked_page; @@ -5001,6 +5002,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, inline_size = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(leaf, path->slots[0])); tmp = kmalloc(inline_size, GFP_NOFS); + if (!tmp) + return -ENOMEM; ptr = btrfs_file_extent_inline_start(item); read_extent_buffer(leaf, tmp, ptr, inline_size); -- cgit v1.2.3 From 64728bbbf892ea7a4aba502c436afbe362217fb9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Apr 2011 19:43:52 -0400 Subject: Btrfs: put the right bio if we have an error In btrfs_submit_direct_hook if the first btrfs_map_block fails we need to put the orig_bio, not bio. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c718d274a352..ad6b515173ac 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6041,7 +6041,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, ret = btrfs_map_block(map_tree, READ, start_sector << 9, &map_length, NULL, 0); if (ret) { - bio_put(bio); + bio_put(orig_bio); return -EIO; } -- cgit v1.2.3 From 7cf96da3ec7ca225acf4f284b0e904a1f5f98821 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 25 Apr 2011 19:43:53 -0400 Subject: Btrfs: cleanup error handling in inode.c The error processing of several places is changed like setting the error number only at the error. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ad6b515173ac..870869aab0b8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4731,9 +4731,10 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, mode, &index); - err = PTR_ERR(inode); - if (IS_ERR(inode)) + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out_unlock; + } err = btrfs_init_inode_security(trans, inode, dir); if (err) { @@ -4792,9 +4793,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, mode, &index); - err = PTR_ERR(inode); - if (IS_ERR(inode)) + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out_unlock; + } err = btrfs_init_inode_security(trans, inode, dir); if (err) { @@ -7278,9 +7280,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, &index); - err = PTR_ERR(inode); - if (IS_ERR(inode)) + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out_unlock; + } err = btrfs_init_inode_security(trans, inode, dir); if (err) { -- cgit v1.2.3 From 306e16ce13c0f3d4fc071b45803b5b83c2606011 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 19 Apr 2011 14:29:38 +0200 Subject: btrfs: rename variables clashing with global function names reported by gcc -Wshadow: page_index, page_offset, new_inode, dev_name Signed-off-by: David Sterba --- fs/btrfs/inode.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04d..fc966472e3ad 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6985,7 +6985,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(old_dir)->root; struct btrfs_root *dest = BTRFS_I(new_dir)->root; - struct inode *new_inode = new_dentry->d_inode; + struct inode *newinode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; u64 index = 0; @@ -7000,18 +7000,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, return -EXDEV; if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || - (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) + (newinode && newinode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) return -ENOTEMPTY; - if (S_ISDIR(old_inode->i_mode) && new_inode && - new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) + if (S_ISDIR(old_inode->i_mode) && newinode && + newinode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; /* * we're using rename to replace one file with another. * and the replacement file is large. Start IO on it now so * we don't add too much work to the end of the transaction */ - if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && + if (newinode && S_ISREG(old_inode->i_mode) && newinode->i_size && old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(old_inode->i_mapping); @@ -7065,7 +7065,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, * make sure the inode gets flushed if it is replacing * something. */ - if (new_inode && new_inode->i_size && + if (newinode && newinode->i_size && old_inode && S_ISREG(old_inode->i_mode)) { btrfs_add_ordered_operation(trans, root, old_inode); } @@ -7092,16 +7092,16 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, } BUG_ON(ret); - if (new_inode) { - new_inode->i_ctime = CURRENT_TIME; - if (unlikely(new_inode->i_ino == + if (newinode) { + newinode->i_ctime = CURRENT_TIME; + if (unlikely(newinode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { - root_objectid = BTRFS_I(new_inode)->location.objectid; + root_objectid = BTRFS_I(newinode)->location.objectid; ret = btrfs_unlink_subvol(trans, dest, new_dir, root_objectid, new_dentry->d_name.name, new_dentry->d_name.len); - BUG_ON(new_inode->i_nlink == 0); + BUG_ON(newinode->i_nlink == 0); } else { ret = btrfs_unlink_inode(trans, dest, new_dir, new_dentry->d_inode, @@ -7109,7 +7109,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dentry->d_name.len); } BUG_ON(ret); - if (new_inode->i_nlink == 0) { + if (newinode->i_nlink == 0) { ret = btrfs_orphan_add(trans, new_dentry->d_inode); BUG_ON(ret); } -- cgit v1.2.3 From c704005d886cf0bc9bc3974eb009b22fe0da32c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 19 Apr 2011 18:00:01 +0200 Subject: btrfs: unify checking of IS_ERR and null use IS_ERR_OR_NULL when possible, done by this coccinelle script: @ match @ identifier id; @@ ( - BUG_ON(IS_ERR(id) || !id); + BUG_ON(IS_ERR_OR_NULL(id)); | - IS_ERR(id) || !id + IS_ERR_OR_NULL(id) | - !id || IS_ERR(id) + IS_ERR_OR_NULL(id) ) Signed-off-by: David Sterba --- fs/btrfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fc966472e3ad..ba760c3ced28 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1855,7 +1855,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, } read_unlock(&em_tree->lock); - if (!em || IS_ERR(em)) { + if (IS_ERR_OR_NULL(em)) { kfree(failrec); return -EIO; } @@ -3006,7 +3006,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); - BUG_ON(!di || IS_ERR(di)); + BUG_ON(IS_ERR_OR_NULL(di)); leaf = path->nodes[0]; btrfs_dir_item_key_to_cpu(leaf, di, &key); @@ -3022,7 +3022,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, BUG_ON(ret != -ENOENT); di = btrfs_search_dir_index_item(root, path, dir->i_ino, name, name_len); - BUG_ON(!di || IS_ERR(di)); + BUG_ON(IS_ERR_OR_NULL(di)); leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); @@ -3032,7 +3032,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, name, name_len, -1); - BUG_ON(!di || IS_ERR(di)); + BUG_ON(IS_ERR_OR_NULL(di)); leaf = path->nodes[0]; btrfs_dir_item_key_to_cpu(leaf, di, &key); @@ -3635,7 +3635,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, block_end - cur_offset, 0); - BUG_ON(IS_ERR(em) || !em); + BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { @@ -3841,7 +3841,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, if (IS_ERR(di)) ret = PTR_ERR(di); - if (!di || IS_ERR(di)) + if (IS_ERR_OR_NULL(di)) goto out_err; btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); -- cgit v1.2.3 From f993c883ad8e111fb9e9ae603540acbe94f7246c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Apr 2011 23:35:57 +0200 Subject: btrfs: drop unused argument from extent_io_tree_init all callers pass GFP_NOFS, but the GFP mask argument is not used in the function; GFP_ATOMIC is passed to radix tree initialization and it's the only correct one, since we're using the preload/insert mechanism of radix tree. Let's drop the gfp mask from btrfs function, this will not change behaviour. Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ba760c3ced28..3c98164f8b24 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6786,8 +6786,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree, GFP_NOFS); - extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); - extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); + extent_io_tree_init(&ei->io_tree, &inode->i_data); + extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); mutex_init(&ei->log_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); -- cgit v1.2.3 From a8067e022ab54fde8953880a64572c3acca644dc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 00:34:43 +0200 Subject: btrfs: drop unused parameter from extent_map_tree_init the GFP flags are not stored anywhere and all allocations are done via alloc_extent_map(GFP_NOFS). Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3c98164f8b24..f54c015cc294 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6785,7 +6785,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->force_compress = BTRFS_COMPRESS_NONE; inode = &ei->vfs_inode; - extent_map_tree_init(&ei->extent_tree, GFP_NOFS); + extent_map_tree_init(&ei->extent_tree); extent_io_tree_init(&ei->io_tree, &inode->i_data); extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); mutex_init(&ei->log_mutex); -- cgit v1.2.3 From 172ddd60a662c4d8bf2809462866ddddd6431ea5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 00:48:27 +0200 Subject: btrfs: drop gfp parameter from alloc_extent_map pass GFP_NOFS directly to kmem_cache_alloc Signed-off-by: David Sterba --- fs/btrfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f54c015cc294..26f4d56cf049 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -649,7 +649,7 @@ retry: async_extent->start + async_extent->ram_size - 1, 0); - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); BUG_ON(!em); em->start = async_extent->start; em->len = async_extent->ram_size; @@ -826,7 +826,7 @@ static noinline int cow_file_range(struct inode *inode, (u64)-1, &ins, 1); BUG_ON(ret); - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); BUG_ON(!em); em->start = start; em->orig_start = em->start; @@ -1177,7 +1177,7 @@ out_check: struct extent_map *em; struct extent_map_tree *em_tree; em_tree = &BTRFS_I(inode)->extent_tree; - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); BUG_ON(!em); em->start = cur_offset; em->orig_start = em->start; @@ -5069,7 +5069,7 @@ again: else goto out; } - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); if (!em) { err = -ENOMEM; goto out; @@ -5382,7 +5382,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag u64 hole_start = start; u64 hole_len = len; - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); if (!em) { err = -ENOMEM; goto out; @@ -5483,7 +5483,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, } if (!em) { - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); if (!em) { em = ERR_PTR(-ENOMEM); goto out; -- cgit v1.2.3 From b3b4aa74b58bded927f579fff787fb6fa1c0393c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 01:20:15 +0200 Subject: btrfs: drop unused parameter from btrfs_release_path parameter tree root it's not used since commit 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee ("Btrfs: Create extent_buffer interface for large blocksizes") Signed-off-by: David Sterba --- fs/btrfs/inode.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 26f4d56cf049..2840989737b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1164,7 +1164,7 @@ out_check: goto next_slot; } - btrfs_release_path(root, path); + btrfs_release_path(path); if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, found_key.offset - 1, page_started, @@ -1222,7 +1222,7 @@ out_check: if (cur_offset > end) break; } - btrfs_release_path(root, path); + btrfs_release_path(path); if (cur_offset <= end && cow_start == (u64)-1) cow_start = cur_offset; @@ -2346,7 +2346,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) break; /* release the path since we're done with it */ - btrfs_release_path(root, path); + btrfs_release_path(path); /* * this is where we are basically btrfs_lookup, without the @@ -2712,7 +2712,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_del_inode_ref(trans, root, name, name_len, inode->i_ino, @@ -2735,7 +2735,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir->i_ino); @@ -2862,7 +2862,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, } else { check_link = 0; } - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, 0); @@ -2876,7 +2876,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, } else { check_link = 0; } - btrfs_release_path(root, path); + btrfs_release_path(path); if (ret == 0 && S_ISREG(inode->i_mode)) { ret = btrfs_lookup_file_extent(trans, root, path, @@ -2888,7 +2888,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, BUG_ON(ret == 0); if (check_path_shared(root, path)) goto out; - btrfs_release_path(root, path); + btrfs_release_path(path); } if (!check_link) { @@ -2909,7 +2909,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, err = 0; goto out; } - btrfs_release_path(root, path); + btrfs_release_path(path); ref = btrfs_lookup_inode_ref(trans, root, path, dentry->d_name.name, dentry->d_name.len, @@ -2922,7 +2922,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, if (check_path_shared(root, path)) goto out; index = btrfs_inode_ref_index(path->nodes[0], ref); - btrfs_release_path(root, path); + btrfs_release_path(path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, dentry->d_name.name, dentry->d_name.len, 0); @@ -3013,7 +3013,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); ret = btrfs_delete_one_dir_name(trans, root, path, di); BUG_ON(ret); - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, @@ -3026,7 +3026,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - btrfs_release_path(root, path); + btrfs_release_path(path); index = key.offset; } @@ -3039,7 +3039,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); ret = btrfs_delete_one_dir_name(trans, root, path, di); BUG_ON(ret); - btrfs_release_path(root, path); + btrfs_release_path(path); btrfs_i_size_write(dir, dir->i_size - name_len * 2); dir->i_mtime = dir->i_ctime = CURRENT_TIME; @@ -3477,7 +3477,7 @@ delete: BUG_ON(ret); pending_del_nr = 0; } - btrfs_release_path(root, path); + btrfs_release_path(path); goto search_again; } else { path->slots[0]--; @@ -3899,7 +3899,7 @@ static int fixup_tree_root_location(struct btrfs_root *root, if (ret) goto out; - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); new_root = btrfs_read_fs_root_no_name(root->fs_info, location); if (IS_ERR(new_root)) { @@ -5223,7 +5223,7 @@ again: kunmap(page); free_extent_map(em); em = NULL; - btrfs_release_path(root, path); + btrfs_release_path(path); trans = btrfs_join_transaction(root, 1); if (IS_ERR(trans)) return ERR_CAST(trans); @@ -5249,7 +5249,7 @@ not_found_em: em->block_start = EXTENT_MAP_HOLE; set_bit(EXTENT_FLAG_VACANCY, &em->flags); insert: - btrfs_release_path(root, path); + btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " "[%llu %llu]\n", (unsigned long long)em->start, -- cgit v1.2.3 From f2a97a9dbd86eb1ef956bdf20e05c507b32beb96 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 5 May 2011 12:44:41 +0200 Subject: btrfs: remove all unused functions Remove static and global declarations and/or definitions. Reduces size of btrfs.ko by ~3.4kB. text data bss dec hex filename 402081 7464 200 409745 64091 btrfs.ko.base 398620 7144 200 405964 631cc btrfs.ko.remove-all Signed-off-by: David Sterba --- fs/btrfs/inode.c | 52 ---------------------------------------------------- 1 file changed, 52 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2840989737b7..57122a5e8473 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7185,58 +7185,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) return 0; } -int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput, - int sync) -{ - struct btrfs_inode *binode; - struct inode *inode = NULL; - - spin_lock(&root->fs_info->delalloc_lock); - while (!list_empty(&root->fs_info->delalloc_inodes)) { - binode = list_entry(root->fs_info->delalloc_inodes.next, - struct btrfs_inode, delalloc_inodes); - inode = igrab(&binode->vfs_inode); - if (inode) { - list_move_tail(&binode->delalloc_inodes, - &root->fs_info->delalloc_inodes); - break; - } - - list_del_init(&binode->delalloc_inodes); - cond_resched_lock(&root->fs_info->delalloc_lock); - } - spin_unlock(&root->fs_info->delalloc_lock); - - if (inode) { - if (sync) { - filemap_write_and_wait(inode->i_mapping); - /* - * We have to do this because compression doesn't - * actually set PG_writeback until it submits the pages - * for IO, which happens in an async thread, so we could - * race and not actually wait for any writeback pages - * because they've not been submitted yet. Technically - * this could still be the case for the ordered stuff - * since the async thread may not have started to do its - * work yet. If this becomes the case then we need to - * figure out a way to make sure that in writepage we - * wait for any async pages to be submitted before - * returning so that fdatawait does what its supposed to - * do. - */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); - } else { - filemap_flush(inode->i_mapping); - } - if (delay_iput) - btrfs_add_delayed_iput(inode); - else - iput(inode); - return 1; - } - return 0; -} - static int btrfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { -- cgit v1.2.3 From 182608c8294b5fe90d7bbd4b026c82bf0a24b736 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 5 May 2011 13:13:16 +0200 Subject: btrfs: remove old unused commented out code Remove code which has been #if0-ed out for a very long time and does not seem to be related to current codebase anymore. Signed-off-by: David Sterba --- fs/btrfs/inode.c | 172 ------------------------------------------------------- 1 file changed, 172 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 57122a5e8473..5ff52b644a60 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3093,178 +3093,6 @@ out: return err; } -#if 0 -/* - * when truncating bytes in a file, it is possible to avoid reading - * the leaves that contain only checksum items. This can be the - * majority of the IO required to delete a large file, but it must - * be done carefully. - * - * The keys in the level just above the leaves are checked to make sure - * the lowest key in a given leaf is a csum key, and starts at an offset - * after the new size. - * - * Then the key for the next leaf is checked to make sure it also has - * a checksum item for the same file. If it does, we know our target leaf - * contains only checksum items, and it can be safely freed without reading - * it. - * - * This is just an optimization targeted at large files. It may do - * nothing. It will return 0 unless things went badly. - */ -static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct inode *inode, u64 new_size) -{ - struct btrfs_key key; - int ret; - int nritems; - struct btrfs_key found_key; - struct btrfs_key other_key; - struct btrfs_leaf_ref *ref; - u64 leaf_gen; - u64 leaf_start; - - path->lowest_level = 1; - key.objectid = inode->i_ino; - key.type = BTRFS_CSUM_ITEM_KEY; - key.offset = new_size; -again: - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto out; - - if (path->nodes[1] == NULL) { - ret = 0; - goto out; - } - ret = 0; - btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]); - nritems = btrfs_header_nritems(path->nodes[1]); - - if (!nritems) - goto out; - - if (path->slots[1] >= nritems) - goto next_node; - - /* did we find a key greater than anything we want to delete? */ - if (found_key.objectid > inode->i_ino || - (found_key.objectid == inode->i_ino && found_key.type > key.type)) - goto out; - - /* we check the next key in the node to make sure the leave contains - * only checksum items. This comparison doesn't work if our - * leaf is the last one in the node - */ - if (path->slots[1] + 1 >= nritems) { -next_node: - /* search forward from the last key in the node, this - * will bring us into the next node in the tree - */ - btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1); - - /* unlikely, but we inc below, so check to be safe */ - if (found_key.offset == (u64)-1) - goto out; - - /* search_forward needs a path with locks held, do the - * search again for the original key. It is possible - * this will race with a balance and return a path that - * we could modify, but this drop is just an optimization - * and is allowed to miss some leaves. - */ - btrfs_release_path(root, path); - found_key.offset++; - - /* setup a max key for search_forward */ - other_key.offset = (u64)-1; - other_key.type = key.type; - other_key.objectid = key.objectid; - - path->keep_locks = 1; - ret = btrfs_search_forward(root, &found_key, &other_key, - path, 0, 0); - path->keep_locks = 0; - if (ret || found_key.objectid != key.objectid || - found_key.type != key.type) { - ret = 0; - goto out; - } - - key.offset = found_key.offset; - btrfs_release_path(root, path); - cond_resched(); - goto again; - } - - /* we know there's one more slot after us in the tree, - * read that key so we can verify it is also a checksum item - */ - btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1); - - if (found_key.objectid < inode->i_ino) - goto next_key; - - if (found_key.type != key.type || found_key.offset < new_size) - goto next_key; - - /* - * if the key for the next leaf isn't a csum key from this objectid, - * we can't be sure there aren't good items inside this leaf. - * Bail out - */ - if (other_key.objectid != inode->i_ino || other_key.type != key.type) - goto out; - - leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]); - leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]); - /* - * it is safe to delete this leaf, it contains only - * csum items from this inode at an offset >= new_size - */ - ret = btrfs_del_leaf(trans, root, path, leaf_start); - BUG_ON(ret); - - if (root->ref_cows && leaf_gen < trans->transid) { - ref = btrfs_alloc_leaf_ref(root, 0); - if (ref) { - ref->root_gen = root->root_key.offset; - ref->bytenr = leaf_start; - ref->owner = 0; - ref->generation = leaf_gen; - ref->nritems = 0; - - btrfs_sort_leaf_ref(ref); - - ret = btrfs_add_leaf_ref(root, ref, 0); - WARN_ON(ret); - btrfs_free_leaf_ref(root, ref); - } else { - WARN_ON(1); - } - } -next_key: - btrfs_release_path(root, path); - - if (other_key.objectid == inode->i_ino && - other_key.type == key.type && other_key.offset > key.offset) { - key.offset = other_key.offset; - cond_resched(); - goto again; - } - ret = 0; -out: - /* fixup any changes we've made to the path */ - path->lowest_level = 0; - path->keep_locks = 0; - btrfs_release_path(root, path); - return ret; -} - -#endif - /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find -- cgit v1.2.3 From a2de733c78fa7af51ba9670482fa7d392aa67c57 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 8 Mar 2011 14:14:00 +0100 Subject: btrfs: scrub This adds an initial implementation for scrub. It works quite straightforward. The usermode issues an ioctl for each device in the fs. For each device, it enumerates the allocated device chunks. For each chunk, the contained extents are enumerated and the data checksums fetched. The extents are read sequentially and the checksums verified. If an error occurs (checksum or EIO), a good copy is searched for. If one is found, the bad copy will be rewritten. All enumerations happen from the commit roots. During a transaction commit, the scrubs get paused and afterwards continue from the new roots. This commit is based on the series originally posted to linux-btrfs with some improvements that resulted from comments from David Sterba, Ilya Dryomov and Jan Schmidt. Signed-off-by: Arne Jansen --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 870869aab0b8..27142446b30a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1007,7 +1007,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, LIST_HEAD(list); ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, - bytenr + num_bytes - 1, &list); + bytenr + num_bytes - 1, &list, 0); if (ret == 0 && list_empty(&list)) return 0; -- cgit v1.2.3 From 7a36ddec1003a4e84e79f28ee714a142ed6bc529 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 6 May 2011 15:33:15 +0200 Subject: btrfs: use printk_ratelimited instead of printk_ratelimit As per printk_ratelimit comment, it should not be used. Signed-off-by: David Sterba --- fs/btrfs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5ff52b644a60..1d1017f91558 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -2004,12 +2005,10 @@ good: return 0; zeroit: - if (printk_ratelimit()) { - printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " + printk_ratelimited(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " "private %llu\n", page->mapping->host->i_ino, (unsigned long long)start, csum, (unsigned long long)private); - } memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); @@ -4243,22 +4242,18 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_end_transaction(trans, root); trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { - if (printk_ratelimit()) { - printk(KERN_ERR "btrfs: fail to " + printk_ratelimited(KERN_ERR "btrfs: fail to " "dirty inode %lu error %ld\n", inode->i_ino, PTR_ERR(trans)); - } return; } btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret) { - if (printk_ratelimit()) { - printk(KERN_ERR "btrfs: fail to " + printk_ratelimited(KERN_ERR "btrfs: fail to " "dirty inode %lu error %d\n", inode->i_ino, ret); - } } } btrfs_end_transaction(trans, root); -- cgit v1.2.3 From 16cdcec736cd214350cdb591bf1091f8beedefa0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 22 Apr 2011 18:12:22 +0800 Subject: btrfs: implement delayed inode items operation Changelog V5 -> V6: - Fix oom when the memory load is high, by storing the delayed nodes into the root's radix tree, and letting btrfs inodes go. Changelog V4 -> V5: - Fix the race on adding the delayed node to the inode, which is spotted by Chris Mason. - Merge Chris Mason's incremental patch into this patch. - Fix deadlock between readdir() and memory fault, which is reported by Itaru Kitayama. Changelog V3 -> V4: - Fix nested lock, which is reported by Itaru Kitayama, by updating space cache inode in time. Changelog V2 -> V3: - Fix the race between the delayed worker and the task which does delayed items balance, which is reported by Tsutomu Itoh. - Modify the patch address David Sterba's comment. - Fix the bug of the cpu recursion spinlock, reported by Chris Mason Changelog V1 -> V2: - break up the global rb-tree, use a list to manage the delayed nodes, which is created for every directory and file, and used to manage the delayed directory name index items and the delayed inode item. - introduce a worker to deal with the delayed nodes. Compare with Ext3/4, the performance of file creation and deletion on btrfs is very poor. the reason is that btrfs must do a lot of b+ tree insertions, such as inode item, directory name item, directory name index and so on. If we can do some delayed b+ tree insertion or deletion, we can improve the performance, so we made this patch which implemented delayed directory name index insertion/deletion and delayed inode update. Implementation: - introduce a delayed root object into the filesystem, that use two lists to manage the delayed nodes which are created for every file/directory. One is used to manage all the delayed nodes that have delayed items. And the other is used to manage the delayed nodes which is waiting to be dealt with by the work thread. - Every delayed node has two rb-tree, one is used to manage the directory name index which is going to be inserted into b+ tree, and the other is used to manage the directory name index which is going to be deleted from b+ tree. - introduce a worker to deal with the delayed operation. This worker is used to deal with the works of the delayed directory name index items insertion and deletion and the delayed inode update. When the delayed items is beyond the lower limit, we create works for some delayed nodes and insert them into the work queue of the worker, and then go back. When the delayed items is beyond the upper bound, we create works for all the delayed nodes that haven't been dealt with, and insert them into the work queue of the worker, and then wait for that the untreated items is below some threshold value. - When we want to insert a directory name index into b+ tree, we just add the information into the delayed inserting rb-tree. And then we check the number of the delayed items and do delayed items balance. (The balance policy is above.) - When we want to delete a directory name index from the b+ tree, we search it in the inserting rb-tree at first. If we look it up, just drop it. If not, add the key of it into the delayed deleting rb-tree. Similar to the delayed inserting rb-tree, we also check the number of the delayed items and do delayed items balance. (The same to inserting manipulation) - When we want to update the metadata of some inode, we cached the data of the inode into the delayed node. the worker will flush it into the b+ tree after dealing with the delayed insertion and deletion. - We will move the delayed node to the tail of the list after we access the delayed node, By this way, we can cache more delayed items and merge more inode updates. - If we want to commit transaction, we will deal with all the delayed node. - the delayed node will be freed when we free the btrfs inode. - Before we log the inode items, we commit all the directory name index items and the delayed inode update. I did a quick test by the benchmark tool[1] and found we can improve the performance of file creation by ~15%, and file deletion by ~20%. Before applying this patch: Create files: Total files: 50000 Total time: 1.096108 Average time: 0.000022 Delete files: Total files: 50000 Total time: 1.510403 Average time: 0.000030 After applying this patch: Create files: Total files: 50000 Total time: 0.932899 Average time: 0.000019 Delete files: Total files: 50000 Total time: 1.215732 Average time: 0.000024 [1] http://marc.info/?l=linux-btrfs&m=128212635122920&q=p3 Many thanks for Kitayama-san's help! Signed-off-by: Miao Xie Reviewed-by: David Sterba Tested-by: Tsutomu Itoh Tested-by: Itaru Kitayama Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 111 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 80 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04d..3470f67c6258 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2647,11 +2647,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int ret; + /* + * If root is tree root, it means this inode is used to + * store free space information. And these inodes are updated + * when committing the transaction, so they needn't delaye to + * be updated, or deadlock will occured. + */ + if (likely(root != root->fs_info->tree_root)) { + ret = btrfs_delayed_update_inode(trans, root, inode); + if (!ret) + btrfs_set_inode_last_trans(trans, inode); + return ret; + } + path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, 1); + ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, + 1); if (ret) { if (ret > 0) ret = -ENOENT; @@ -2661,7 +2676,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_unlock_up_safe(path, 1); leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_inode_item); + struct btrfs_inode_item); fill_inode_item(trans, leaf, inode_item, inode); btrfs_mark_buffer_dirty(leaf); @@ -2672,7 +2687,6 @@ failed: return ret; } - /* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and @@ -2724,18 +2738,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; } - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - index, name, name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto err; - } - if (!di) { - ret = -ENOENT; + ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); + if (ret) goto err; - } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - btrfs_release_path(root, path); ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, dir->i_ino); @@ -2924,6 +2929,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, index = btrfs_inode_ref_index(path->nodes[0], ref); btrfs_release_path(root, path); + /* + * This is a commit root search, if we can lookup inode item and other + * relative items in the commit root, it means the transaction of + * dir/file creation has been committed, and the dir index item that we + * delay to insert has also been inserted into the commit root. So + * we needn't worry about the delayed insertion of the dir index item + * here. + */ di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, dentry->d_name.name, dentry->d_name.len, 0); if (IS_ERR(di)) { @@ -3029,24 +3042,16 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); index = key.offset; } + btrfs_release_path(root, path); - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - index, name, name_len, -1); - BUG_ON(!di || IS_ERR(di)); - - leaf = path->nodes[0]; - btrfs_dir_item_key_to_cpu(leaf, di, &key); - WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); - ret = btrfs_delete_one_dir_name(trans, root, path, di); + ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); BUG_ON(ret); - btrfs_release_path(root, path); btrfs_i_size_write(dir, dir->i_size - name_len * 2); dir->i_mtime = dir->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); - btrfs_free_path(path); return 0; } @@ -3306,6 +3311,15 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (root->ref_cows || root == root->fs_info->tree_root) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); + /* + * This function is also used to drop the items in the log tree before + * we relog the inode, so if root != BTRFS_I(inode)->root, it means + * it is used to drop the loged items. So we shouldn't kill the delayed + * items. + */ + if (min_type == 0 && root == BTRFS_I(inode)->root) + btrfs_kill_delayed_inode_items(inode); + path = btrfs_alloc_path(); BUG_ON(!path); path->reada = -1; @@ -4208,7 +4222,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -static unsigned char btrfs_filetype_table[] = { +unsigned char btrfs_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; @@ -4222,6 +4236,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; + struct list_head ins_list; + struct list_head del_list; int ret; struct extent_buffer *leaf; int slot; @@ -4234,6 +4250,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, char tmp_name[32]; char *name_ptr; int name_len; + int is_curr = 0; /* filp->f_pos points to the current index? */ /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -4258,8 +4275,16 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, filp->f_pos = 2; } path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; path->reada = 2; + if (key_type == BTRFS_DIR_INDEX_KEY) { + INIT_LIST_HEAD(&ins_list); + INIT_LIST_HEAD(&del_list); + btrfs_get_delayed_items(inode, &ins_list, &del_list); + } + btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; key.objectid = inode->i_ino; @@ -4289,8 +4314,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, break; if (found_key.offset < filp->f_pos) goto next; + if (key_type == BTRFS_DIR_INDEX_KEY && + btrfs_should_delete_dir_index(&del_list, + found_key.offset)) + goto next; filp->f_pos = found_key.offset; + is_curr = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; @@ -4345,6 +4375,15 @@ next: path->slots[0]++; } + if (key_type == BTRFS_DIR_INDEX_KEY) { + if (is_curr) + filp->f_pos++; + ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir, + &ins_list); + if (ret) + goto nopos; + } + /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) /* @@ -4357,6 +4396,8 @@ next: nopos: ret = 0; err: + if (key_type == BTRFS_DIR_INDEX_KEY) + btrfs_put_delayed_items(&ins_list, &del_list); btrfs_free_path(path); return ret; } @@ -4434,6 +4475,8 @@ void btrfs_dirty_inode(struct inode *inode) } } btrfs_end_transaction(trans, root); + if (BTRFS_I(inode)->delayed_node) + btrfs_balance_delayed_items(root); } /* @@ -4502,9 +4545,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) int ret = 0; if (BTRFS_I(dir)->index_cnt == (u64)-1) { - ret = btrfs_set_inode_index_count(dir); - if (ret) - return ret; + ret = btrfs_inode_delayed_dir_index_count(dir); + if (ret) { + ret = btrfs_set_inode_index_count(dir); + if (ret) + return ret; + } } *index = BTRFS_I(dir)->index_cnt; @@ -4671,7 +4717,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, if (ret == 0) { ret = btrfs_insert_dir_item(trans, root, name, name_len, - parent_inode->i_ino, &key, + parent_inode, &key, btrfs_inode_type(inode), index); BUG_ON(ret); @@ -6784,6 +6830,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->dummy_inode = 0; ei->force_compress = BTRFS_COMPRESS_NONE; + ei->delayed_node = NULL; + inode = &ei->vfs_inode; extent_map_tree_init(&ei->extent_tree, GFP_NOFS); extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); @@ -6874,6 +6922,7 @@ void btrfs_destroy_inode(struct inode *inode) inode_tree_del(inode); btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); free: + btrfs_remove_delayed_node(inode); call_rcu(&inode->i_rcu, btrfs_i_callback); } -- cgit v1.2.3 From 74b2107543da4ed9607ec484f63c42362dc9fca6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:02:53 -0400 Subject: Btrfs: make sure to use the delalloc reserve when filling delalloc In the prealloc filling code and compressed code we don't set trans->block_rsv to the delalloc block reserve properly, which is going to make us use metadata from the wrong pool, this patch fixes that. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04d..3b9f1643aa57 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -619,6 +619,7 @@ retry: trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, @@ -1060,6 +1061,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root, 1); } BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; cow_start = (u64)-1; cur_offset = start; -- cgit v1.2.3 From 7a7eaa40a39bde4eefc91aadeb1ce3dc4e6a1252 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:54:33 -0400 Subject: Btrfs: take away the num_items argument from btrfs_join_transaction I keep forgetting that btrfs_join_transaction() just ignores the num_items argument, which leads me to sending pointless patches and looking stupid :). So just kill the num_items argument from btrfs_join_transaction and btrfs_start_ioctl_transaction, since neither of them use it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b9f1643aa57..e47bdf0fb75a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -420,7 +420,7 @@ again: } } if (start == 0) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -617,7 +617,7 @@ retry: async_extent->start + async_extent->ram_size - 1, GFP_NOFS); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_reserve_extent(trans, root, @@ -779,7 +779,7 @@ static noinline int cow_file_range(struct inode *inode, int ret = 0; BUG_ON(root == root->fs_info->tree_root); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1056,9 +1056,9 @@ static noinline int run_delalloc_nocow(struct inode *inode, BUG_ON(!path); if (root == root->fs_info->tree_root) { nolock = true; - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); } else { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); } BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1718,9 +1718,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (!ret) { if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1735,9 +1735,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 0, &cached_state, GFP_NOFS); if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -2415,7 +2415,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) (u64)-1); if (root->orphan_block_rsv || root->orphan_item_inserted) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (!IS_ERR(trans)) btrfs_end_transaction(trans, root); } @@ -4378,9 +4378,9 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) { if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); @@ -4407,7 +4407,7 @@ void btrfs_dirty_inode(struct inode *inode) if (BTRFS_I(inode)->dummy_inode) return; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -5226,7 +5226,7 @@ again: free_extent_map(em); em = NULL; btrfs_release_path(root, path); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return ERR_CAST(trans); goto again; @@ -5470,7 +5470,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + len - 1, 0); } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return ERR_CAST(trans); @@ -5703,7 +5703,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * to make sure the current transaction stays open * while we look for nocow cross refs */ - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) goto must_cow; @@ -5841,7 +5841,7 @@ again: BUG_ON(!ordered); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { err = -ENOMEM; goto out; -- cgit v1.2.3 From fcb80c2affd63237cff5b34cba5756be7c976a5a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 May 2011 10:40:22 -0400 Subject: Btrfs: fix how we do space reservation for truncate The ceph guys keep running into problems where we have space reserved in our orphan block rsv when freeing it up. This is because they tend to do snapshots alot, so their truncates tend to use a bunch of space, so when we go to do things like update the inode we have to steal reservation space in order to make the reservation happen. This happens because truncate can use as much space as it freaking feels like, but we still have to hold space for removing the orphan item and updating the inode, which will definitely always happen. So in order to fix this we need to split all of the reservation stuf up. So with this patch we have 1) The orphan block reserve which only holds the space for deleting our orphan item when everything is over. 2) The truncate block reserve which gets allocated and used specifically for the space that the truncate will use on a per truncate basis. 3) The transaction will always have 1 item's worth of data reserved so we can update the inode normally. Hopefully this will make the ceph problem go away. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 111 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e47bdf0fb75a..bc12ba23db5f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6591,6 +6591,7 @@ out: static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_rsv *rsv; int ret; int err = 0; struct btrfs_trans_handle *trans; @@ -6604,28 +6605,83 @@ static int btrfs_truncate(struct inode *inode) btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - trans = btrfs_start_transaction(root, 5); - if (IS_ERR(trans)) - return PTR_ERR(trans); + /* + * Yes ladies and gentelment, this is indeed ugly. The fact is we have + * 3 things going on here + * + * 1) We need to reserve space for our orphan item and the space to + * delete our orphan item. Lord knows we don't want to have a dangling + * orphan item because we didn't reserve space to remove it. + * + * 2) We need to reserve space to update our inode. + * + * 3) We need to have something to cache all the space that is going to + * be free'd up by the truncate operation, but also have some slack + * space reserved in case it uses space during the truncate (thank you + * very much snapshotting). + * + * And we need these to all be seperate. The fact is we can use alot of + * space doing the truncate, and we have no earthly idea how much space + * we will use, so we need the truncate reservation to be seperate so it + * doesn't end up using space reserved for updating the inode or + * removing the orphan item. We also need to be able to stop the + * transaction and start a new one, which means we need to be able to + * update the inode several times, and we have no idea of knowing how + * many times that will be, so we can't just reserve 1 item for the + * entirety of the opration, so that has to be done seperately as well. + * Then there is the orphan item, which does indeed need to be held on + * to for the whole operation, and we need nobody to touch this reserved + * space except the orphan code. + * + * So that leaves us with + * + * 1) root->orphan_block_rsv - for the orphan deletion. + * 2) rsv - for the truncate reservation, which we will steal from the + * transaction reservation. + * 3) fs_info->trans_block_rsv - this will have 1 items worth left for + * updating the inode. + */ + rsv = btrfs_alloc_block_rsv(root); + if (!rsv) + return -ENOMEM; + btrfs_add_durable_block_rsv(root->fs_info, rsv); + + trans = btrfs_start_transaction(root, 4); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } btrfs_set_trans_block_group(trans, inode); + /* + * Reserve space for the truncate process. Truncate should be adding + * space, but if there are snapshots it may end up using space. + */ + ret = btrfs_truncate_reserve_metadata(trans, root, rsv); + BUG_ON(ret); + ret = btrfs_orphan_add(trans, inode); if (ret) { btrfs_end_transaction(trans, root); - return ret; + goto out; } nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - /* Now start a transaction for the truncate */ - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + /* + * Ok so we've already migrated our bytes over for the truncate, so here + * just reserve the one slot we need for updating the inode. + */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; + trans->block_rsv = rsv; /* * setattr is responsible for setting the ordered_data_close flag, @@ -6649,24 +6705,18 @@ static int btrfs_truncate(struct inode *inode) while (1) { if (!trans) { - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; - } + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, 0, 5); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - trans = NULL; - continue; - } else if (ret) { - err = ret; - break; + ret = btrfs_truncate_reserve_metadata(trans, root, + rsv); + BUG_ON(ret); + + btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = rsv; } ret = btrfs_truncate_inode_items(trans, root, inode, @@ -6677,6 +6727,7 @@ static int btrfs_truncate(struct inode *inode) break; } + trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret) { err = ret; @@ -6690,6 +6741,7 @@ static int btrfs_truncate(struct inode *inode) } if (ret == 0 && inode->i_nlink > 0) { + trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, inode); if (ret) err = ret; @@ -6701,15 +6753,20 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(NULL, inode); } + trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret && !err) err = ret; nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); + btrfs_btree_balance_dirty(root, nr); + +out: + btrfs_free_block_rsv(root, rsv); + if (ret && !err) err = ret; - btrfs_btree_balance_dirty(root, nr); return err; } -- cgit v1.2.3 From d82a6f1d7e8b61ed5996334d0db66651bb43641d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 15:26:06 -0400 Subject: Btrfs: kill BTRFS_I(inode)->block_group Originally this was going to be used as a way to give hints to the allocator, but frankly we can get much better hints elsewhere and it's not even used at all for anything usefull. In addition to be completely useless, when we initialize an inode we try and find a freeish block group to set as the inodes block group, and with a completely full 40gb fs this takes _forever_, so I imagine with say 1tb fs this is just unbearable. So just axe the thing altoghether, we don't need it and it saves us 8 bytes in the inode and saves us 500 microseconds per inode lookup in my testcase. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 87 ++++++-------------------------------------------------- 1 file changed, 9 insertions(+), 78 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bc12ba23db5f..dd5938a7de21 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -136,7 +136,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->leave_spinning = 1; - btrfs_set_trans_block_group(trans, inode); key.objectid = inode->i_ino; key.offset = start; @@ -422,7 +421,6 @@ again: if (start == 0) { trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; /* lets try to make an inline extent */ @@ -781,7 +779,6 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(root == root->fs_info->tree_root); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -1502,8 +1499,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, { struct btrfs_ordered_sum *sum; - btrfs_set_trans_block_group(trans, inode); - list_for_each_entry(sum, list, list) { btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); @@ -1722,7 +1717,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) else trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); @@ -1739,7 +1733,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) else trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -2495,7 +2488,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; int maybe_acls; - u64 alloc_group_block; u32 rdev; int ret; @@ -2539,8 +2531,6 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - /* * try to precache a NULL acl entry for files that don't have * any xattrs or acls @@ -2549,8 +2539,6 @@ static void btrfs_read_locked_inode(struct inode *inode) if (!maybe_acls) cache_no_acl(inode); - BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, - alloc_group_block, 0); btrfs_free_path(path); inode_item = NULL; @@ -2630,7 +2618,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); - btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); + btrfs_set_inode_block_group(leaf, item, 0); if (leaf->map_token) { unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); @@ -2971,8 +2959,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, @@ -3068,8 +3054,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, root, dir, BTRFS_I(inode)->location.objectid, @@ -3649,7 +3633,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = PTR_ERR(trans); break; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, @@ -3785,7 +3768,6 @@ void btrfs_evict_inode(struct inode *inode) while (1) { trans = btrfs_start_transaction(root, 0); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = root->orphan_block_rsv; ret = btrfs_block_rsv_check(trans, root, @@ -4383,7 +4365,6 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); if (nolock) ret = btrfs_end_transaction_nolock(trans, root); else @@ -4409,7 +4390,6 @@ void btrfs_dirty_inode(struct inode *inode) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret && ret == -ENOSPC) { @@ -4424,7 +4404,6 @@ void btrfs_dirty_inode(struct inode *inode) } return; } - btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret) { @@ -4519,8 +4498,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, const char *name, int name_len, - u64 ref_objectid, u64 objectid, - u64 alloc_hint, int mode, u64 *index) + u64 ref_objectid, u64 objectid, int mode, + u64 *index) { struct inode *inode; struct btrfs_inode_item *inode_item; @@ -4567,8 +4546,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - BTRFS_I(inode)->block_group = - btrfs_find_block_group(root, 0, alloc_hint, owner); key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4729,11 +4706,9 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, mode, &index); + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -4745,7 +4720,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -4754,8 +4728,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -4791,11 +4763,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, mode, &index); + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -4807,7 +4777,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -4818,8 +4787,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -4866,8 +4833,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_inc_nlink(inode); inode->i_ctime = CURRENT_TIME; - - btrfs_set_trans_block_group(trans, dir); ihold(inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); @@ -4876,7 +4841,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; } else { struct dentry *parent = dget_parent(dentry); - btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); BUG_ON(err); btrfs_log_new_name(trans, inode, NULL, parent); @@ -4917,12 +4881,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFDIR | mode, - &index); + S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -4936,7 +4898,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - btrfs_set_trans_block_group(trans, inode); btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); @@ -4950,8 +4911,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_fail: nr = trans->blocks_used; @@ -6652,8 +6611,6 @@ static int btrfs_truncate(struct inode *inode) goto out; } - btrfs_set_trans_block_group(trans, inode); - /* * Reserve space for the truncate process. Truncate should be adding * space, but if there are snapshots it may end up using space. @@ -6680,7 +6637,6 @@ static int btrfs_truncate(struct inode *inode) err = PTR_ERR(trans); goto out; } - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = rsv; /* @@ -6715,7 +6671,6 @@ static int btrfs_truncate(struct inode *inode) rsv); BUG_ON(ret); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = rsv; } @@ -6775,15 +6730,14 @@ out: * create a new subvolume directory/inode (helper for the ioctl). */ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, - u64 new_dirid, u64 alloc_hint) + struct btrfs_root *new_root, u64 new_dirid) { struct inode *inode; int err; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, alloc_hint, S_IFDIR | 0700, &index); + new_dirid, S_IFDIR | 0700, &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; @@ -6893,21 +6847,6 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } - if (root == root->fs_info->tree_root) { - struct btrfs_block_group_cache *block_group; - - block_group = btrfs_lookup_block_group(root->fs_info, - BTRFS_I(inode)->block_group); - if (block_group && block_group->inode == inode) { - spin_lock(&block_group->lock); - block_group->inode = NULL; - spin_unlock(&block_group->lock); - btrfs_put_block_group(block_group); - } else if (block_group) { - btrfs_put_block_group(block_group); - } - } - spin_lock(&root->orphan_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", @@ -7091,8 +7030,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_notrans; } - btrfs_set_trans_block_group(trans, new_dir); - if (dest != root) btrfs_record_root_in_trans(trans, dest); @@ -7331,12 +7268,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, - &index); + S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -7348,7 +7282,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -7359,8 +7292,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); if (drop_inode) goto out_unlock; -- cgit v1.2.3 From 026fd317828500524cdc7e5ff9e8e7923abb2868 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 10:32:11 -0400 Subject: Btrfs: don't always do readahead Our readahead is sort of sloppy, and really isn't always needed. For example if ls is doing a stating ls (which is the default) it's going to stat in non-disk order, so if say you have a directory with a stupid amount of files, readahead is going to do nothing but waste time in the case of doing the stat. Taking the unconditional readahead out made my test go from 57 minutes to 36 minutes. This means that everywhere we do loop through the tree we want to make sure we do set path->reada properly, so I went through and found all of the places where we loop through the path and set reada to 1. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd5938a7de21..6228a304b547 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4242,7 +4242,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, filp->f_pos = 2; } path = btrfs_alloc_path(); - path->reada = 2; + if (!path) + return -ENOMEM; + path->reada = 1; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; @@ -5043,7 +5045,15 @@ again: if (!path) { path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + goto out; + } + /* + * Chances are we'll be called again, so go ahead and do + * readahead + */ + path->reada = 1; } ret = btrfs_lookup_file_extent(trans, root, path, -- cgit v1.2.3 From d90c732122a1f6d0efe388a8a204f67f144b2eb3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 May 2011 09:50:54 -0400 Subject: Btrfs: leave spinning on lookup and map the leaf On lookup we only want to read the inode item, so leave the path spinning. Also we're just wholesale reading the leaf off, so map the leaf so we don't do a bunch of kmap/kunmaps. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6228a304b547..dc8fb2b3a145 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2493,6 +2493,7 @@ static void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -2502,6 +2503,12 @@ static void btrfs_read_locked_inode(struct inode *inode) leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); + if (!leaf->map_token) + map_private_extent_buffer(leaf, (unsigned long)inode_item, + sizeof(struct btrfs_inode_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); inode->i_mode = btrfs_inode_mode(leaf, inode_item); inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); @@ -2539,6 +2546,11 @@ static void btrfs_read_locked_inode(struct inode *inode) if (!maybe_acls) cache_no_acl(inode); + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + btrfs_free_path(path); inode_item = NULL; -- cgit v1.2.3 From 27160b6b5a1744b6eaa8416e2b901ec937b1eee0 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 20 May 2011 20:20:32 +0000 Subject: btrfs: fix typo 'testeing' -> 'testing' Signed-off-by: Sergei Trofimovich Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04d..72650ceb9829 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1310,7 +1310,7 @@ static int btrfs_set_bit_hook(struct inode *inode, /* * set_bit and clear bit hooks normally require _irqsave/restore - * but in this case, we are only testeing for the DELALLOC + * but in this case, we are only testing for the DELALLOC * bit, which is only set or cleared with irqs on */ if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { @@ -1344,7 +1344,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, { /* * set_bit and clear bit hooks normally require _irqsave/restore - * but in this case, we are only testeing for the DELALLOC + * but in this case, we are only testing for the DELALLOC * bit, which is only set or cleared with irqs on */ if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { -- cgit v1.2.3 From 1cd307990d6e2b4965620e339a92e0d7ae853e13 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 19 May 2011 05:19:08 +0000 Subject: Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item Currently, btrfs_truncate_item and btrfs_extend_item returns only 0. So, the check by BUG_ON in the caller is unnecessary. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 72650ceb9829..e9e2b4778279 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3428,7 +3428,6 @@ search_again: btrfs_file_extent_calc_inline_size(size); ret = btrfs_truncate_item(trans, root, path, size, 1); - BUG_ON(ret); } else if (root->ref_cows) { inode_sub_bytes(inode, item_end + 1 - found_key.offset); -- cgit v1.2.3 From b083916638eee513be501f53b42a4be0b9851db0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 14 May 2011 07:10:51 +0000 Subject: fs/btrfs: Add missing btrfs_free_path Btrfs_alloc_path should be matched with btrfs_free_path in error-handling code. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ local idexpression struct btrfs_path * x; expression ra,rb; position p1,p2; @@ x = btrfs_alloc_path@p1(...) ... when != btrfs_free_path(x,...) when != if (...) { ... btrfs_free_path(x,...) ...} when != x = ra if(...) { ... when != x = rb when forall when != btrfs_free_path(x,...) \(return <+...x...+>; \| return@p2...; \) } @script:python@ p1 << r.p1; p2 << r.p2; @@ cocci.print_main("alloc",p1) cocci.print_secs("return",p2) // Signed-off-by: Julia Lawall Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9e2b4778279..80fcd5177731 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7314,6 +7314,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize); if (err) { drop_inode = 1; + btrfs_free_path(path); goto out_unlock; } leaf = path->nodes[0]; -- cgit v1.2.3 From 4cb5300bc839b8a943eb19c9f27f25470e22d0ca Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 May 2011 15:35:30 -0400 Subject: Btrfs: add mount -o auto_defrag This will detect small random writes into files and queue the up for an auto defrag process. It isn't well suited to database workloads yet, but works for smaller files such as rpm, sqlite or bdb databases. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d378f8b70ef7..bb51bb1fa44f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -342,6 +342,10 @@ static noinline int compress_file_range(struct inode *inode, int will_compress; int compress_type = root->fs_info->compress_type; + /* if this is a small write inside eof, kick off a defragbot */ + if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) + btrfs_add_inode_defrag(NULL, inode); + actual_end = min_t(u64, isize, end + 1); again: will_compress = 0; @@ -799,6 +803,10 @@ static noinline int cow_file_range(struct inode *inode, disk_num_bytes = num_bytes; ret = 0; + /* if this is a small write inside eof, kick off defrag */ + if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) + btrfs_add_inode_defrag(trans, inode); + if (start == 0) { /* lets try to make an inline extent */ ret = cow_file_range_inline(trans, root, inode, @@ -5371,6 +5379,9 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, if (IS_ERR(trans)) return ERR_CAST(trans); + if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024) + btrfs_add_inode_defrag(trans, inode); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; alloc_hint = get_extent_allocation_hint(inode, start, len); @@ -6682,6 +6693,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; ei->dummy_inode = 0; + ei->in_defrag = 0; ei->force_compress = BTRFS_COMPRESS_NONE; ei->delayed_node = NULL; -- cgit v1.2.3 From aa38572954ade525817fe88c54faebf85e5a61c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 May 2011 06:53:02 -0400 Subject: fs: pass exact type of data dirties to ->dirty_inode Tell the filesystem if we just updated timestamp (I_DIRTY_SYNC) or anything else, so that the filesystem can track internally if it needs to push out a transaction for fdatasync or not. This is just the prototype change with no user for it yet. I plan to push large XFS changes for the next merge window, and getting this trivial infrastructure in this window would help a lot to avoid tree interdependencies. Also remove incorrect comments that ->dirty_inode can't block. That has been changed a long time ago, and many implementations rely on it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04d..ecff7d7a505f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4396,7 +4396,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -void btrfs_dirty_inode(struct inode *inode) +void btrfs_dirty_inode(struct inode *inode, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; -- cgit v1.2.3 From 7841cb2898f66a73062c64d0ef5733dde7279e46 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 18:07:27 +0200 Subject: btrfs: add helper for fs_info->closing wrap checking of filesystem 'closing' flag and fix a few missing memory barriers. Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a83e44bf3206..02ff4a1b968b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4266,8 +4266,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (BTRFS_I(inode)->dummy_inode) return 0; - smp_mb(); - if (root->fs_info->closing && is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { -- cgit v1.2.3 From 08d2f347e877e489ca098c87a6fd2e872fef9767 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 4 May 2011 16:18:50 +0200 Subject: Btrfs: fix extent state leak on failed nodatasum reads When encountering an EIO while reading from a nodatasum extent, we insert an error record into the inode's failure tree. btrfs_readpage_end_io_hook returns early for nodatasum inodes. We'd better clear the failure tree in that case, otherwise the kernel complains about BUG extent_state: Objects remaining on kmem_cache_close() on rmmod. Signed-off-by: Jan Schmidt Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02ff4a1b968b..113913ae36e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1986,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, } if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) - return 0; + goto good; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { -- cgit v1.2.3 From 30b4caf5d73af5c99cf1b2b46496d8bc35330992 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 8 Jun 2011 03:56:44 +0000 Subject: Btrfs: use join_transaction in btrfs_evict_inode() The WARN_ON() in start_transaction() was triggered while balancing. The cause is btrfs_relocate_chunk() started a transaction and then called iput() on the inode that stores free space cache, and iput() called btrfs_start_transaction() again. Reported-by: Tsutomu Itoh Signed-off-by: Li Zefan Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 113913ae36e0..c15636b17874 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3646,7 +3646,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_i_size_write(inode, 0); while (1) { - trans = btrfs_start_transaction(root, 0); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = root->orphan_block_rsv; -- cgit v1.2.3 From 71d7aed014457147e8f71a843d5fbf03235e4a85 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 14 Jun 2011 14:24:32 -0400 Subject: Btrfs: fix path leakage on subvol deletion The delayed ref patch accidently removed the btrfs_free_path in btrfs_unlink_subvol, this puts it back and means we don't leak a path. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c15636b17874..5813dec5101c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); + btrfs_free_path(path); return 0; } -- cgit v1.2.3