From ece7d20e8be6730fbb29f4550de6b19b1a3a9387 Mon Sep 17 00:00:00 2001 From: Mike Fleetwood Date: Fri, 18 Nov 2011 18:55:01 +0000 Subject: Btrfs: Don't error on resizing FS to same size It seems overly harsh to fail a resize of a btrfs file system to the same size when a shrink or grow would succeed. User app GParted trips over this error. Allow it by bypassing the shrink or grow operation. Signed-off-by: Mike Fleetwood --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a90e749ed6d2..72d461656f60 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1278,7 +1278,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, } ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); - } else { + } else if (new_size < old_size) { ret = btrfs_shrink_device(device, new_size); } -- cgit v1.2.3 From 306424cc880a0fbbdc99eee1f43d056a301a180f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 14 Dec 2011 20:12:02 -0500 Subject: Btrfs: fix ctime update of on-disk inode To reproduce the bug: # touch /mnt/tmp # stat /mnt/tmp | grep Change Change: 2011-12-09 09:32:23.412105981 +0800 # chattr +i /mnt/tmp # stat /mnt/tmp | grep Change Change: 2011-12-09 09:32:43.198105295 +0800 # umount /mnt # mount /dev/loop1 /mnt # stat /mnt/tmp | grep Change Change: 2011-12-09 09:32:23.412105981 +0800 We should update ctime of in-memory inode before calling btrfs_update_inode(). Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 72d461656f60..40eaa9fdf07c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -252,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); + btrfs_update_iflags(inode); + inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - btrfs_update_iflags(inode); - inode->i_ctime = CURRENT_TIME; btrfs_end_transaction(trans, root); mnt_drop_write(file->f_path.mnt); -- cgit v1.2.3 From 660d3f6cde552323578b85fc5a09a6742f1fe804 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 9 Dec 2011 11:18:51 -0500 Subject: Btrfs: fix how we do delalloc reservations and how we free reservations on error Running xfstests 269 with some tracing my scripts kept spitting out errors about releasing bytes that we didn't actually have reserved. This took me down a huge rabbit hole and it turns out the way we deal with reserved_extents is wrong, we need to only be setting it if the reservation succeeds, otherwise the free() method will come in and unreserve space that isn't actually reserved yet, which can lead to other warnings and such. The math was all working out right in the end, but it caused all sorts of other issues in addition to making my scripts yell and scream and generally make it impossible for me to track down the original issue I was looking for. The other problem is with our error handling in the reservation code. There are two cases that we need to deal with 1) We raced with free. In this case free won't free anything because csum_bytes is modified before we dro the lock in our reservation path, so free rightly doesn't release any space because the reservation code may be depending on that reservation. However if we fail, we need the reservation side to do the free at that point since that space is no longer in use. So as it stands the code was doing this fine and it worked out, except in case #2 2) We don't race with free. Nobody comes in and changes anything, and our reservation fails. In this case we didn't reserve anything anyway and we just need to clean up csum_bytes but not free anything. So we keep track of csum_bytes before we drop the lock and if it hasn't changed we know we can just decrement csum_bytes and carry on. Because of the case where we can race with free()'s since we have to drop our spin_lock to do the reservation, I'm going to serialize all reservations with the i_mutex. We already get this for free in the heavy use paths, truncate and file write all hold the i_mutex, just needed to add it to page_mkwrite and various ioctl/balance things. With this patch my space leak scripts no longer scream bloody murder. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 72d461656f60..dd8891662355 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode, return 0; file_end = (isize - 1) >> PAGE_CACHE_SHIFT; + mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, num_pages << PAGE_CACHE_SHIFT); + mutex_unlock(&inode->i_mutex); if (ret) return ret; again: -- cgit v1.2.3 From 66d7e7f09f77456fe68683247d77721032a00ee5 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 12 Sep 2011 15:26:38 +0200 Subject: Btrfs: mark delayed refs as for cow Add a for_cow parameter to add_delayed_*_ref and pass the appropriate value from every call site. The for_cow parameter will later on be used to determine if a ref will change anything with respect to qgroups. Delayed refs coming from relocation are always counted as for_cow, as they don't change subvol quota. Also pass in the fs_info for later use. btrfs_find_all_roots() will use this as an optimization, as changes that are for_cow will not change anything with respect to which root points to a certain leaf. Thus, we don't need to add the current sequence number to those delayed refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 72d461656f60..c48f2e931ea0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -358,7 +358,7 @@ static noinline int create_subvol(struct btrfs_root *root, return PTR_ERR(trans); leaf = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, objectid, NULL, 0, 0, 0); + 0, objectid, NULL, 0, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); goto fail; @@ -2425,7 +2425,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, disko, diskl, 0, root->root_key.objectid, btrfs_ino(inode), - new_key.offset - datao); + new_key.offset - datao, + 0); BUG_ON(ret); } } else if (type == BTRFS_FILE_EXTENT_INLINE) { -- cgit v1.2.3 From a561be7100cd610bd2e082f3211c1dfb45835817 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Nov 2011 11:57:51 -0500 Subject: switch a bunch of places to mnt_want_write_file() it's both faster (in case when file has been opened for write) and cleaner. Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c04f02c7d5bb..20dd8f3b6c72 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -201,7 +201,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } } - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) goto out_unlock; @@ -1855,7 +1855,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out; } - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) goto out; @@ -1987,7 +1987,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) if (btrfs_root_readonly(root)) return -EROFS; - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) return ret; @@ -2195,7 +2195,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (btrfs_root_readonly(root)) return -EROFS; - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) return ret; @@ -2549,7 +2549,7 @@ static long btrfs_ioctl_trans_start(struct file *file) if (btrfs_root_readonly(root)) goto out; - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) goto out; -- cgit v1.2.3 From 2a79f17e4a641a2f463cb512cb0ec349844a147b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Dec 2011 08:06:57 -0500 Subject: vfs: mnt_drop_write_file() new helper (wrapper around mnt_drop_write()) to be used in pair with mnt_want_write_file(). Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 20dd8f3b6c72..5441ff1480fd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -259,7 +259,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_end_transaction(trans, root); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); ret = 0; out_unlock: @@ -1971,7 +1971,7 @@ out_dput: dput(dentry); out_unlock_dir: mutex_unlock(&dir->i_mutex); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); out: kfree(vol_args); return err; @@ -2040,7 +2040,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) ret = -EINVAL; } out: - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return ret; } @@ -2510,7 +2510,7 @@ out_unlock: out_fput: fput(src_file); out_drop_write: - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return ret; } @@ -2565,7 +2565,7 @@ static long btrfs_ioctl_trans_start(struct file *file) out_drop: atomic_dec(&root->fs_info->open_ioctl_trans); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); out: return ret; } @@ -2800,7 +2800,7 @@ long btrfs_ioctl_trans_end(struct file *file) atomic_dec(&root->fs_info->open_ioctl_trans); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return 0; } -- cgit v1.2.3 From 4692cf58aa7b81f721c1653d48db99ea41421d58 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Fri, 2 Dec 2011 14:56:41 +0100 Subject: Btrfs: new backref walking code The old backref iteration code could only safely be used on commit roots. Besides this limitation, it had bugs in finding the roots for these references. This commit replaces large parts of it by btrfs_find_all_roots() which a) really finds all roots and the correct roots, b) works correctly under heavy file system load, c) considers delayed refs. Signed-off-by: Jan Schmidt --- fs/btrfs/ioctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c48f2e931ea0..9b0526872b7b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2976,7 +2976,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, { int ret = 0; int size; - u64 extent_offset; + u64 extent_item_pos; struct btrfs_ioctl_logical_ino_args *loi; struct btrfs_data_container *inodes = NULL; struct btrfs_path *path = NULL; @@ -3007,15 +3007,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, } ret = extent_from_logical(root->fs_info, loi->logical, path, &key); + btrfs_release_path(path); if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) ret = -ENOENT; if (ret < 0) goto out; - extent_offset = loi->logical - key.objectid; + extent_item_pos = loi->logical - key.objectid; ret = iterate_extent_inodes(root->fs_info, path, key.objectid, - extent_offset, build_ino_list, inodes); + extent_item_pos, build_ino_list, + inodes); if (ret < 0) goto out; -- cgit v1.2.3 From 815745cf3e46681241ad8025602ffbf2a452d514 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Nov 2011 15:40:49 -0500 Subject: btrfs: let ->s_fs_info point to fs_info, not root... the latter can be obtained from the former (by looking as ->tree_root) just as cheaply as we currently are doing the other way round. Signed-off-by: Al Viro --- fs/btrfs/ioctl.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5441ff1480fd..0c55f8f38091 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -276,14 +276,13 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) { - struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); struct btrfs_device *device; struct request_queue *q; struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; - u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -312,7 +311,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); - ret = btrfs_trim_fs(root, &range); + ret = btrfs_trim_fs(fs_info->tree_root, &range); if (ret < 0) return ret; -- cgit v1.2.3 From f062abf089ff705e09bbaa6fa1e2fd7688a0f2ea Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 29 Dec 2011 13:36:45 +0800 Subject: Btrfs: remove BUG_ON()s in btrfs_ioctl_setflags() We can recover from errors and return -errno to user space. Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c04f02c7d5bb..9619fb03a5d6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -176,6 +176,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_trans_handle *trans; unsigned int flags, oldflags; int ret; + u64 ip_oldflags; + unsigned int i_oldflags; if (btrfs_root_readonly(root)) return -EROFS; @@ -192,6 +194,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) mutex_lock(&inode->i_mutex); + ip_oldflags = ip->flags; + i_oldflags = inode->i_flags; + flags = btrfs_mask_flags(inode->i_mode, flags); oldflags = btrfs_flags_to_ioctl(ip->flags); if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { @@ -250,18 +255,23 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_drop; + } btrfs_update_iflags(inode); inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); btrfs_end_transaction(trans, root); + out_drop: + if (ret) { + ip->flags = ip_oldflags; + inode->i_flags = i_oldflags; + } mnt_drop_write(file->f_path.mnt); - - ret = 0; out_unlock: mutex_unlock(&inode->i_mutex); return ret; -- cgit v1.2.3 From 4da6f1a332f6c16b6594c7892f13c31459b9b1c8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 29 Dec 2011 13:39:50 +0800 Subject: Btrfs: reserve metadata space in btrfs_ioctl_setflags() Check and reserve space for btrfs_update_inode(). Signed-off-by: Li Zefan --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9619fb03a5d6..fe8a60c865eb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -254,7 +254,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } - trans = btrfs_join_transaction(root); + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_drop; -- cgit v1.2.3 From c9e9f97bdfb64d06e9520f8e4f37674ac21cc9bc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2012 22:04:47 +0200 Subject: Btrfs: add basic restriper infrastructure Add basic restriper infrastructure: extended balancing ioctl and all related ioctl data structures, add data structure for tracking restriper's state to fs_info, etc. The semantics of the old balancing ioctl are fully preserved. Explicitly disallow any volume operations when balance is in progress. Signed-off-by: Ilya Dryomov --- fs/btrfs/ioctl.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 119 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c04f02c7d5bb..d838d2cfb947 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + mutex_lock(&root->fs_info->volume_mutex); + if (root->fs_info->balance_ctl) { + printk(KERN_INFO "btrfs: balance in progress\n"); + ret = -EINVAL; + goto out; + } + vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) - return PTR_ERR(vol_args); + if (IS_ERR(vol_args)) { + ret = PTR_ERR(vol_args); + goto out; + } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - mutex_lock(&root->fs_info->volume_mutex); sizestr = vol_args->name; devstr = strchr(sizestr, ':'); if (devstr) { @@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", (unsigned long long)devid); ret = -EINVAL; - goto out_unlock; + goto out_free; } if (!strcmp(sizestr, "max")) new_size = device->bdev->bd_inode->i_size; @@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, new_size = memparse(sizestr, NULL); if (new_size == 0) { ret = -EINVAL; - goto out_unlock; + goto out_free; } } @@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (mod < 0) { if (new_size > old_size) { ret = -EINVAL; - goto out_unlock; + goto out_free; } new_size = old_size - new_size; } else if (mod > 0) { @@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (new_size < 256 * 1024 * 1024) { ret = -EINVAL; - goto out_unlock; + goto out_free; } if (new_size > device->bdev->bd_inode->i_size) { ret = -EFBIG; - goto out_unlock; + goto out_free; } do_div(new_size, root->sectorsize); @@ -1276,7 +1284,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - goto out_unlock; + goto out_free; } ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); @@ -1284,9 +1292,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, ret = btrfs_shrink_device(device, new_size); } -out_unlock: - mutex_unlock(&root->fs_info->volume_mutex); +out_free: kfree(vol_args); +out: + mutex_unlock(&root->fs_info->volume_mutex); return ret; } @@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + mutex_lock(&root->fs_info->volume_mutex); + if (root->fs_info->balance_ctl) { + printk(KERN_INFO "btrfs: balance in progress\n"); + ret = -EINVAL; + goto out; + } + vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) - return PTR_ERR(vol_args); + if (IS_ERR(vol_args)) { + ret = PTR_ERR(vol_args); + goto out; + } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_init_new_device(root, vol_args->name); kfree(vol_args); +out: + mutex_unlock(&root->fs_info->volume_mutex); return ret; } @@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; + mutex_lock(&root->fs_info->volume_mutex); + if (root->fs_info->balance_ctl) { + printk(KERN_INFO "btrfs: balance in progress\n"); + ret = -EINVAL; + goto out; + } + vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) - return PTR_ERR(vol_args); + if (IS_ERR(vol_args)) { + ret = PTR_ERR(vol_args); + goto out; + } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; ret = btrfs_rm_device(root, vol_args->name); kfree(vol_args); +out: + mutex_unlock(&root->fs_info->volume_mutex); return ret; } @@ -3034,6 +3065,76 @@ out: return ret; } +void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, + struct btrfs_ioctl_balance_args *bargs) +{ + struct btrfs_balance_control *bctl = fs_info->balance_ctl; + + bargs->flags = bctl->flags; + + memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); + memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); + memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); +} + +static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_ioctl_balance_args *bargs; + struct btrfs_balance_control *bctl; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + mutex_lock(&fs_info->volume_mutex); + mutex_lock(&fs_info->balance_mutex); + + if (arg) { + bargs = memdup_user(arg, sizeof(*bargs)); + if (IS_ERR(bargs)) { + ret = PTR_ERR(bargs); + goto out; + } + } else { + bargs = NULL; + } + + bctl = kzalloc(sizeof(*bctl), GFP_NOFS); + if (!bctl) { + ret = -ENOMEM; + goto out_bargs; + } + + bctl->fs_info = fs_info; + if (arg) { + memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); + memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); + memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); + + bctl->flags = bargs->flags; + } + + ret = btrfs_balance(bctl, bargs); + /* + * bctl is freed in __cancel_balance + */ + if (arg) { + if (copy_to_user(arg, bargs, sizeof(*bargs))) + ret = -EFAULT; + } + +out_bargs: + kfree(bargs); +out: + mutex_unlock(&fs_info->balance_mutex); + mutex_unlock(&fs_info->volume_mutex); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3078,7 +3179,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEV_INFO: return btrfs_ioctl_dev_info(root, argp); case BTRFS_IOC_BALANCE: - return btrfs_balance(root->fs_info->dev_root); + return btrfs_ioctl_balance(root, NULL); case BTRFS_IOC_CLONE: return btrfs_ioctl_clone(file, arg, 0, 0, 0); case BTRFS_IOC_CLONE_RANGE: @@ -3110,6 +3211,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_scrub_cancel(root, argp); case BTRFS_IOC_SCRUB_PROGRESS: return btrfs_ioctl_scrub_progress(root, argp); + case BTRFS_IOC_BALANCE_V2: + return btrfs_ioctl_balance(root, argp); } return -ENOTTY; -- cgit v1.2.3 From f43ffb60fd94e98be02780944e182ade6653b916 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2012 22:04:47 +0200 Subject: Btrfs: add basic infrastructure for selective balancing This allows to have a separate set of filters for each chunk type (data,meta,sys). The code however is generic and switch on chunk type is only done once. This commit also adds a type filter: it allows to balance for example meta and system chunks w/o touching data ones. Signed-off-by: Ilya Dryomov --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d838d2cfb947..29b3a94933f0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3116,6 +3116,9 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); bctl->flags = bargs->flags; + } else { + /* balance everything - no filters */ + bctl->flags |= BTRFS_BALANCE_TYPE_MASK; } ret = btrfs_balance(bctl, bargs); -- cgit v1.2.3 From 837d5b6e46d1a4af5b6cc8f2fe83cb5de79a2961 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2012 22:04:49 +0200 Subject: Btrfs: allow for pausing restriper Implement an ioctl for pausing restriper. This pauses the relocation, but balance is still considered to be "in progress": balance item is not deleted, other volume operations cannot be started, etc. If paused in the middle of profile changing operation we will continue making allocations with the target profile. Add a hook to close_ctree() to pause restriper and free its data structures on unmount. (It's safe to unmount when restriper is in "paused" state, we will resume with the same parameters on the next mount) Signed-off-by: Ilya Dryomov --- fs/btrfs/ioctl.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 29b3a94933f0..f572c53dda4f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3072,6 +3072,11 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, bargs->flags = bctl->flags; + if (atomic_read(&fs_info->balance_running)) + bargs->state |= BTRFS_BALANCE_STATE_RUNNING; + if (atomic_read(&fs_info->balance_pause_req)) + bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; + memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); @@ -3103,6 +3108,11 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) bargs = NULL; } + if (fs_info->balance_ctl) { + ret = -EINPROGRESS; + goto out_bargs; + } + bctl = kzalloc(sizeof(*bctl), GFP_NOFS); if (!bctl) { ret = -ENOMEM; @@ -3123,7 +3133,8 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) ret = btrfs_balance(bctl, bargs); /* - * bctl is freed in __cancel_balance + * bctl is freed in __cancel_balance or in free_fs_info if + * restriper was paused all the way until unmount */ if (arg) { if (copy_to_user(arg, bargs, sizeof(*bargs))) @@ -3138,6 +3149,19 @@ out: return ret; } +static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case BTRFS_BALANCE_CTL_PAUSE: + return btrfs_pause_balance(root->fs_info); + } + + return -EINVAL; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3216,6 +3240,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_scrub_progress(root, argp); case BTRFS_IOC_BALANCE_V2: return btrfs_ioctl_balance(root, argp); + case BTRFS_IOC_BALANCE_CTL: + return btrfs_ioctl_balance_ctl(root, arg); } return -ENOTTY; -- cgit v1.2.3 From a7e99c691af553fc15ac46a51f130b7c59a65f76 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2012 22:04:49 +0200 Subject: Btrfs: allow for canceling restriper Implement an ioctl for canceling restriper. Currently we wait until relocation of the current block group is finished, in future this can be done by triggering a commit. Balance item is deleted and no memory about the interrupted balance is kept. Signed-off-by: Ilya Dryomov --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f572c53dda4f..60852217ce9a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3076,6 +3076,8 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, bargs->state |= BTRFS_BALANCE_STATE_RUNNING; if (atomic_read(&fs_info->balance_pause_req)) bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; + if (atomic_read(&fs_info->balance_cancel_req)) + bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); @@ -3157,6 +3159,8 @@ static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) switch (cmd) { case BTRFS_BALANCE_CTL_PAUSE: return btrfs_pause_balance(root->fs_info); + case BTRFS_BALANCE_CTL_CANCEL: + return btrfs_cancel_balance(root->fs_info); } return -EINVAL; -- cgit v1.2.3 From de322263d3a6d4ffd4ed7c4d0c6536e9497aec9b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2012 22:04:49 +0200 Subject: Btrfs: allow for resuming restriper after it was paused Recognize BTRFS_BALANCE_RESUME flag passed from userspace. We use the same heuristics used when recovering balance after a crash to try to start where we left off last time. Signed-off-by: Ilya Dryomov --- fs/btrfs/ioctl.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 60852217ce9a..85e546ffe3c7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3106,6 +3106,20 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) ret = PTR_ERR(bargs); goto out; } + + if (bargs->flags & BTRFS_BALANCE_RESUME) { + if (!fs_info->balance_ctl) { + ret = -ENOTCONN; + goto out_bargs; + } + + bctl = fs_info->balance_ctl; + spin_lock(&fs_info->balance_lock); + bctl->flags |= BTRFS_BALANCE_RESUME; + spin_unlock(&fs_info->balance_lock); + + goto do_balance; + } } else { bargs = NULL; } @@ -3133,6 +3147,7 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) bctl->flags |= BTRFS_BALANCE_TYPE_MASK; } +do_balance: ret = btrfs_balance(bctl, bargs); /* * bctl is freed in __cancel_balance or in free_fs_info if -- cgit v1.2.3 From 19a39dce3b9bf0244d19a446718ad6f7605ff099 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 16 Jan 2012 22:04:49 +0200 Subject: Btrfs: add balance progress reporting Signed-off-by: Ilya Dryomov --- fs/btrfs/ioctl.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 85e546ffe3c7..1e7a9bac31ab 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3065,7 +3065,7 @@ out: return ret; } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, +void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, struct btrfs_ioctl_balance_args *bargs) { struct btrfs_balance_control *bctl = fs_info->balance_ctl; @@ -3082,6 +3082,14 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); + + if (lock) { + spin_lock(&fs_info->balance_lock); + memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); + spin_unlock(&fs_info->balance_lock); + } else { + memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); + } } static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) @@ -3181,6 +3189,39 @@ static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) return -EINVAL; } +static long btrfs_ioctl_balance_progress(struct btrfs_root *root, + void __user *arg) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_ioctl_balance_args *bargs; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&fs_info->balance_mutex); + if (!fs_info->balance_ctl) { + ret = -ENOTCONN; + goto out; + } + + bargs = kzalloc(sizeof(*bargs), GFP_NOFS); + if (!bargs) { + ret = -ENOMEM; + goto out; + } + + update_ioctl_balance_args(fs_info, 1, bargs); + + if (copy_to_user(arg, bargs, sizeof(*bargs))) + ret = -EFAULT; + + kfree(bargs); +out: + mutex_unlock(&fs_info->balance_mutex); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3261,6 +3302,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_balance(root, argp); case BTRFS_IOC_BALANCE_CTL: return btrfs_ioctl_balance_ctl(root, arg); + case BTRFS_IOC_BALANCE_PROGRESS: + return btrfs_ioctl_balance_progress(root, argp); } return -ENOTTY; -- cgit v1.2.3 From f248679e86fead40cc78e724c7181d6bec1a2046 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 Jan 2012 12:09:22 -0500 Subject: Btrfs: add a delalloc mutex to inodes for delalloc reservations I was using i_mutex for this, but we're getting bogus lockdep warnings by doing that and theres no real way to get rid of those, so just stop using i_mutex to protect delalloc metadata reservations and use a delalloc mutex instead. This shouldn't be contended often at all, only if you are writing and mmap writing to the file at the same time. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ioctl.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7fdf22c2dc0d..6834be4c8709 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -868,10 +868,8 @@ static int cluster_pages_for_defrag(struct inode *inode, return 0; file_end = (isize - 1) >> PAGE_CACHE_SHIFT; - mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, num_pages << PAGE_CACHE_SHIFT); - mutex_unlock(&inode->i_mutex); if (ret) return ret; again: -- cgit v1.2.3 From 7ec31b548a17f773ab6289e795ed3a6820e8b56e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 26 Jan 2012 15:01:12 -0500 Subject: Btrfs: do not defrag a file partially xfstests 218 complains that btrfs defrags a file partially: After: 1 Write backwards sync, but contiguous - should defrag to 1 extent Before: 10 -After: 1 +After: 2 To fix this, we need to set max_to_defrag count properly. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6834be4c8709..0b06a5ca8afc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1066,7 +1066,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index; + max_to_defrag = last_index + 1; /* * make writeback starts from i, so the defrag range can be -- cgit v1.2.3 From 600a45e1d5e376f679ff9ecc4ce9452710a6d27c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 16 Feb 2012 15:01:24 +0800 Subject: Btrfs: fix deadlock on page lock when doing auto-defragment When I ran xfstests circularly on a auto-defragment btrfs, the deadlock happened. Steps to reproduce: [tty0] # export MOUNT_OPTIONS="-o autodefrag" # export TEST_DEV= # export TEST_DIR= # export SCRATCH_DEV= # export SCRATCH_MNT= # while [ 1 ] > do > ./check 091 127 263 > sleep 1 > done [tty1] # while [ 1 ] > do > echo 3 > /proc/sys/vm/drop_caches > done Several hours later, the test processes will hang on, and the deadlock will happen on page lock. The reason is that: Auto defrag task Flush thread Test task btrfs_writepages() add ordered extent (including page 1, 2) set page 1 writeback set page 2 writeback endio_fn() end page 2 writeback release page 2 lock page 1 alloc and lock page 2 page 2 is not uptodate btrfs_readpage() start ordered extent() btrfs_writepages() try to lock page 1 so deadlock happens. Fix this bug by unlocking the page which is in writeback, and re-locking it after the writeback end. Signed-off-by: Miao Xie --- fs/btrfs/ioctl.c | 53 +++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b06a5ca8afc..e9bdb8b783e5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -862,6 +862,7 @@ static int cluster_pages_for_defrag(struct inode *inode, int i_done; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; + struct extent_io_tree *tree; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); if (isize == 0) @@ -872,18 +873,34 @@ static int cluster_pages_for_defrag(struct inode *inode, num_pages << PAGE_CACHE_SHIFT); if (ret) return ret; -again: - ret = 0; i_done = 0; + tree = &BTRFS_I(inode)->io_tree; /* step one, lock all the pages */ for (i = 0; i < num_pages; i++) { struct page *page; +again: page = find_or_create_page(inode->i_mapping, - start_index + i, mask); + start_index + i, mask); if (!page) break; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + while (1) { + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, + page_start); + unlock_extent(tree, page_start, page_end, GFP_NOFS); + if (!ordered) + break; + + unlock_page(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + lock_page(page); + } + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -894,15 +911,22 @@ again: break; } } + isize = i_size_read(inode); file_end = (isize - 1) >> PAGE_CACHE_SHIFT; - if (!isize || page->index > file_end || - page->mapping != inode->i_mapping) { + if (!isize || page->index > file_end) { /* whoops, we blew past eof, skip this page */ unlock_page(page); page_cache_release(page); break; } + + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + goto again; + } + pages[i] = page; i_done++; } @@ -925,25 +949,6 @@ again: lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 0, &cached_state, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1); - if (ordered && - ordered->file_offset + ordered->len > page_start && - ordered->file_offset < page_end) { - btrfs_put_ordered_extent(ordered); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, - &cached_state, GFP_NOFS); - for (i = 0; i < i_done; i++) { - unlock_page(pages[i]); - page_cache_release(pages[i]); - } - btrfs_wait_ordered_range(inode, page_start, - page_end - page_start); - goto again; - } - if (ordered) - btrfs_put_ordered_extent(ordered); - clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, -- cgit v1.2.3 From 16780cabb877dbd0c8c5e9ff9bdebd6c5bdd1a7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 20 Feb 2012 22:14:55 -0500 Subject: Btrfs: add extra sanity checks on the path names in btrfs_mksubvol Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e9bdb8b783e5..05446f77f99b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1333,6 +1333,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, goto out; } + if (name[0] == '.' && + (namelen == 1 || (name[1] == '.' && namelen == 2))) { + ret = -EEXIST; + goto out; + } + if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, NULL, transid, readonly); -- cgit v1.2.3