From 33345d01522f8152f99dc84a3e7a1a45707f387f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:31:50 +0800 Subject: Btrfs: Always use 64bit inode number There's a potential problem in 32bit system when we exhaust 32bit inode numbers and start to allocate big inode numbers, because btrfs uses inode->i_ino in many places. So here we always use BTRFS_I(inode)->location.objectid, which is an u64 variable. There are 2 exceptions that BTRFS_I(inode)->location.objectid != inode->i_ino: the btree inode (0 vs 1) and empty subvol dirs (256 vs 2), and inode->i_ino will be used in those cases. Another reason to make this change is I'm going to use a special inode to save free ino cache, and the inode number must be > (u64)-256. Signed-off-by: Li Zefan --- fs/btrfs/file.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 75899a01dded..bef020451525 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -298,6 +298,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, struct btrfs_path *path; struct btrfs_key key; struct btrfs_key new_key; + u64 ino = btrfs_ino(inode); u64 search_start = start; u64 disk_bytenr = 0; u64 num_bytes = 0; @@ -318,14 +319,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, while (1) { recow = 0; - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + ret = btrfs_lookup_file_extent(trans, root, path, ino, search_start, -1); if (ret < 0) break; if (ret > 0 && path->slots[0] > 0 && search_start == start) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); - if (key.objectid == inode->i_ino && + if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) path->slots[0]--; } @@ -346,7 +347,7 @@ next_slot: } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > inode->i_ino || + if (key.objectid > ino || key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; @@ -592,6 +593,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, int del_slot = 0; int recow; int ret; + u64 ino = btrfs_ino(inode); btrfs_drop_extent_cache(inode, start, end - 1, 0); @@ -600,7 +602,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, again: recow = 0; split = start; - key.objectid = inode->i_ino; + key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = split; @@ -612,8 +614,7 @@ again: leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - BUG_ON(key.objectid != inode->i_ino || - key.type != BTRFS_EXTENT_DATA_KEY); + BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); BUG_ON(btrfs_file_extent_type(leaf, fi) != @@ -630,7 +631,7 @@ again: other_start = 0; other_end = start; if (extent_mergeable(leaf, path->slots[0] - 1, - inode->i_ino, bytenr, orig_offset, + ino, bytenr, orig_offset, &other_start, &other_end)) { new_key.offset = end; btrfs_set_item_key_safe(trans, root, path, &new_key); @@ -653,7 +654,7 @@ again: other_start = end; other_end = 0; if (extent_mergeable(leaf, path->slots[0] + 1, - inode->i_ino, bytenr, orig_offset, + ino, bytenr, orig_offset, &other_start, &other_end)) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -702,7 +703,7 @@ again: ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - inode->i_ino, orig_offset); + ino, orig_offset); BUG_ON(ret); if (split == start) { @@ -718,7 +719,7 @@ again: other_start = end; other_end = 0; if (extent_mergeable(leaf, path->slots[0] + 1, - inode->i_ino, bytenr, orig_offset, + ino, bytenr, orig_offset, &other_start, &other_end)) { if (recow) { btrfs_release_path(root, path); @@ -729,13 +730,13 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - inode->i_ino, orig_offset); + ino, orig_offset); BUG_ON(ret); } other_start = 0; other_end = start; if (extent_mergeable(leaf, path->slots[0] - 1, - inode->i_ino, bytenr, orig_offset, + ino, bytenr, orig_offset, &other_start, &other_end)) { if (recow) { btrfs_release_path(root, path); @@ -746,7 +747,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - inode->i_ino, orig_offset); + ino, orig_offset); BUG_ON(ret); } if (del_nr == 0) { -- cgit v1.2.3 From c704005d886cf0bc9bc3974eb009b22fe0da32c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 19 Apr 2011 18:00:01 +0200 Subject: btrfs: unify checking of IS_ERR and null use IS_ERR_OR_NULL when possible, done by this coccinelle script: @ match @ identifier id; @@ ( - BUG_ON(IS_ERR(id) || !id); + BUG_ON(IS_ERR_OR_NULL(id)); | - IS_ERR(id) || !id + IS_ERR_OR_NULL(id) | - !id || IS_ERR(id) + IS_ERR_OR_NULL(id) ) Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 75899a01dded..83abd274370b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1375,7 +1375,7 @@ static long btrfs_fallocate(struct file *file, int mode, while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); - BUG_ON(IS_ERR(em) || !em); + BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; if (em->block_start == EXTENT_MAP_HOLE || -- cgit v1.2.3 From 172ddd60a662c4d8bf2809462866ddddd6431ea5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 00:48:27 +0200 Subject: btrfs: drop gfp parameter from alloc_extent_map pass GFP_NOFS directly to kmem_cache_alloc Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 83abd274370b..80eabe85409a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -191,9 +191,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, } while (1) { if (!split) - split = alloc_extent_map(GFP_NOFS); + split = alloc_extent_map(); if (!split2) - split2 = alloc_extent_map(GFP_NOFS); + split2 = alloc_extent_map(); BUG_ON(!split || !split2); write_lock(&em_tree->lock); -- cgit v1.2.3 From b3b4aa74b58bded927f579fff787fb6fa1c0393c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 01:20:15 +0200 Subject: btrfs: drop unused parameter from btrfs_release_path parameter tree root it's not used since commit 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee ("Btrfs: Create extent_buffer interface for large blocksizes") Signed-off-by: David Sterba --- fs/btrfs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 80eabe85409a..566bdf298ea8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -376,7 +376,7 @@ next_slot: search_start = max(key.offset, start); if (recow) { - btrfs_release_path(root, path); + btrfs_release_path(path); continue; } @@ -393,7 +393,7 @@ next_slot: ret = btrfs_duplicate_item(trans, root, path, &new_key); if (ret == -EAGAIN) { - btrfs_release_path(root, path); + btrfs_release_path(path); continue; } if (ret < 0) @@ -516,7 +516,7 @@ next_slot: del_nr = 0; del_slot = 0; - btrfs_release_path(root, path); + btrfs_release_path(path); continue; } @@ -681,7 +681,7 @@ again: new_key.offset = split; ret = btrfs_duplicate_item(trans, root, path, &new_key); if (ret == -EAGAIN) { - btrfs_release_path(root, path); + btrfs_release_path(path); goto again; } BUG_ON(ret < 0); @@ -721,7 +721,7 @@ again: inode->i_ino, bytenr, orig_offset, &other_start, &other_end)) { if (recow) { - btrfs_release_path(root, path); + btrfs_release_path(path); goto again; } extent_end = other_end; @@ -738,7 +738,7 @@ again: inode->i_ino, bytenr, orig_offset, &other_start, &other_end)) { if (recow) { - btrfs_release_path(root, path); + btrfs_release_path(path); goto again; } key.offset = other_start; -- cgit v1.2.3 From a4abeea41adfa3c143c289045f4625dfaeba2212 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 17:25:13 -0400 Subject: Btrfs: kill trans_mutex We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 75899a01dded..cd5e82e500cf 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync) * the current transaction, we can bail out now without any * syncing */ - mutex_lock(&root->fs_info->trans_mutex); + smp_mb(); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; - mutex_unlock(&root->fs_info->trans_mutex); goto out; } - mutex_unlock(&root->fs_info->trans_mutex); /* * ok we haven't committed the transaction yet, lets do a commit -- cgit v1.2.3 From 4cb5300bc839b8a943eb19c9f27f25470e22d0ca Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 May 2011 15:35:30 -0400 Subject: Btrfs: add mount -o auto_defrag This will detect small random writes into files and queue the up for an auto defrag process. It isn't well suited to database workloads yet, but works for smaller files such as rpm, sqlite or bdb databases. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 257 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58ddc4442159..c6a22d783c35 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -40,6 +40,263 @@ #include "locking.h" #include "compat.h" +/* + * when auto defrag is enabled we + * queue up these defrag structs to remember which + * inodes need defragging passes + */ +struct inode_defrag { + struct rb_node rb_node; + /* objectid */ + u64 ino; + /* + * transid where the defrag was added, we search for + * extents newer than this + */ + u64 transid; + + /* root objectid */ + u64 root; + + /* last offset we were able to defrag */ + u64 last_offset; + + /* if we've wrapped around back to zero once already */ + int cycled; +}; + +/* pop a record for an inode into the defrag tree. The lock + * must be held already + * + * If you're inserting a record for an older transid than an + * existing record, the transid already in the tree is lowered + * + * If an existing record is found the defrag item you + * pass in is freed + */ +static int __btrfs_add_inode_defrag(struct inode *inode, + struct inode_defrag *defrag) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct inode_defrag *entry; + struct rb_node **p; + struct rb_node *parent = NULL; + + p = &root->fs_info->defrag_inodes.rb_node; + while (*p) { + parent = *p; + entry = rb_entry(parent, struct inode_defrag, rb_node); + + if (defrag->ino < entry->ino) + p = &parent->rb_left; + else if (defrag->ino > entry->ino) + p = &parent->rb_right; + else { + /* if we're reinserting an entry for + * an old defrag run, make sure to + * lower the transid of our existing record + */ + if (defrag->transid < entry->transid) + entry->transid = defrag->transid; + if (defrag->last_offset > entry->last_offset) + entry->last_offset = defrag->last_offset; + goto exists; + } + } + BTRFS_I(inode)->in_defrag = 1; + rb_link_node(&defrag->rb_node, parent, p); + rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); + return 0; + +exists: + kfree(defrag); + return 0; + +} + +/* + * insert a defrag record for this inode if auto defrag is + * enabled + */ +int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct inode_defrag *defrag; + int ret = 0; + u64 transid; + + if (!btrfs_test_opt(root, AUTO_DEFRAG)) + return 0; + + if (root->fs_info->closing) + return 0; + + if (BTRFS_I(inode)->in_defrag) + return 0; + + if (trans) + transid = trans->transid; + else + transid = BTRFS_I(inode)->root->last_trans; + + defrag = kzalloc(sizeof(*defrag), GFP_NOFS); + if (!defrag) + return -ENOMEM; + + defrag->ino = inode->i_ino; + defrag->transid = transid; + defrag->root = root->root_key.objectid; + + spin_lock(&root->fs_info->defrag_inodes_lock); + if (!BTRFS_I(inode)->in_defrag) + ret = __btrfs_add_inode_defrag(inode, defrag); + spin_unlock(&root->fs_info->defrag_inodes_lock); + return ret; +} + +/* + * must be called with the defrag_inodes lock held + */ +struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, + struct rb_node **next) +{ + struct inode_defrag *entry = NULL; + struct rb_node *p; + struct rb_node *parent = NULL; + + p = info->defrag_inodes.rb_node; + while (p) { + parent = p; + entry = rb_entry(parent, struct inode_defrag, rb_node); + + if (ino < entry->ino) + p = parent->rb_left; + else if (ino > entry->ino) + p = parent->rb_right; + else + return entry; + } + + if (next) { + while (parent && ino > entry->ino) { + parent = rb_next(parent); + entry = rb_entry(parent, struct inode_defrag, rb_node); + } + *next = parent; + } + return NULL; +} + +/* + * run through the list of inodes in the FS that need + * defragging + */ +int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) +{ + struct inode_defrag *defrag; + struct btrfs_root *inode_root; + struct inode *inode; + struct rb_node *n; + struct btrfs_key key; + struct btrfs_ioctl_defrag_range_args range; + u64 first_ino = 0; + int num_defrag; + int defrag_batch = 1024; + + memset(&range, 0, sizeof(range)); + range.len = (u64)-1; + + atomic_inc(&fs_info->defrag_running); + spin_lock(&fs_info->defrag_inodes_lock); + while(1) { + n = NULL; + + /* find an inode to defrag */ + defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); + if (!defrag) { + if (n) + defrag = rb_entry(n, struct inode_defrag, rb_node); + else if (first_ino) { + first_ino = 0; + continue; + } else { + break; + } + } + + /* remove it from the rbtree */ + first_ino = defrag->ino + 1; + rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); + + if (fs_info->closing) + goto next_free; + + spin_unlock(&fs_info->defrag_inodes_lock); + + /* get the inode */ + key.objectid = defrag->root; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = (u64)-1; + inode_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(inode_root)) + goto next; + + key.objectid = defrag->ino; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); + if (IS_ERR(inode)) + goto next; + + /* do a chunk of defrag */ + BTRFS_I(inode)->in_defrag = 0; + range.start = defrag->last_offset; + num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, + defrag_batch); + /* + * if we filled the whole defrag batch, there + * must be more work to do. Queue this defrag + * again + */ + if (num_defrag == defrag_batch) { + defrag->last_offset = range.start; + __btrfs_add_inode_defrag(inode, defrag); + /* + * we don't want to kfree defrag, we added it back to + * the rbtree + */ + defrag = NULL; + } else if (defrag->last_offset && !defrag->cycled) { + /* + * we didn't fill our defrag batch, but + * we didn't start at zero. Make sure we loop + * around to the start of the file. + */ + defrag->last_offset = 0; + defrag->cycled = 1; + __btrfs_add_inode_defrag(inode, defrag); + defrag = NULL; + } + + iput(inode); +next: + spin_lock(&fs_info->defrag_inodes_lock); +next_free: + kfree(defrag); + } + spin_unlock(&fs_info->defrag_inodes_lock); + + atomic_dec(&fs_info->defrag_running); + + /* + * during unmount, we use the transaction_wait queue to + * wait for the defragger to stop + */ + wake_up(&fs_info->transaction_wait); + return 0; +} /* simple helper to fault in pages and copy. This should go away * and be replaced with calls into generic code. -- cgit v1.2.3 From a4689d2bd3b00dcf5c4320f06e0ab88810fbff9c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 17:08:14 +0000 Subject: btrfs: use btrfs_ino to access inode number commit 4cb5300bc ("Btrfs: add mount -o auto_defrag") accesses inode number directly while it should use the helper with the new inode number allocator. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e3a1b0c2394c..982b5ea9762f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!defrag) return -ENOMEM; - defrag->ino = inode->i_ino; + defrag->ino = btrfs_ino(inode); defrag->transid = transid; defrag->root = root->root_key.objectid; -- cgit v1.2.3 From 7841cb2898f66a73062c64d0ef5733dde7279e46 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 18:07:27 +0200 Subject: btrfs: add helper for fs_info->closing wrap checking of filesystem 'closing' flag and fix a few missing memory barriers. Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 982b5ea9762f..fa4ef18b66b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!btrfs_test_opt(root, AUTO_DEFRAG)) return 0; - if (root->fs_info->closing) + if (btrfs_fs_closing(root->fs_info)) return 0; if (BTRFS_I(inode)->in_defrag) @@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) first_ino = defrag->ino + 1; rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) goto next_free; spin_unlock(&fs_info->defrag_inodes_lock); -- cgit v1.2.3