From ee3441b49092000402748f5345ee0a3d4c8ac04e Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 9 Jul 2013 16:37:21 -0400 Subject: btrfs: fall back to global reservation when removing subvolumes I recently did some ENOSPC testing that involved filling the disk while create and removing snapshots in a loop. During the test cycle, I ran into an ENOSPC when trying to remove a snapshot, leaving the fs stuck in ENOSPC even after a umount/mount cycle. This patch allow subvolume removal to fall back onto the global block reservation in order to succeed when it would have failed otherwise. Signed-off-by: Jeff Mahoney Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 238a05545ee2..841044b80658 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -396,7 +396,7 @@ static noinline int create_subvol(struct inode *dir, * of create_snapshot(). */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 7, &qgroup_reserved); + 7, &qgroup_reserved, false); if (ret) return ret; @@ -576,7 +576,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, &pending_snapshot->block_rsv, 7, - &pending_snapshot->qgroup_reserved); + &pending_snapshot->qgroup_reserved, + false); if (ret) goto out; @@ -2174,7 +2175,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, * ref/backref. */ err = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 5, &qgroup_reserved); + 5, &qgroup_reserved, true); if (err) goto out_up_write; -- cgit v1.2.3 From 3cae210fa529d69cb25c2a3c491f29dab687b245 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Jul 2013 11:19:18 +0800 Subject: btrfs: Cleanup for using BTRFS_SETGET_STACK instead of raw convert Some codes still use the cpu_to_lexx instead of the BTRFS_SETGET_STACK_FUNCS declared in ctree.h. Also added some BTRFS_SETGET_STACK_FUNCS for btrfs_header btrfs_timespec and other structures. Signed-off-by: Qu Wenruo Reviewed-by: Miao Xie Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 841044b80658..f856c97952db 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -436,15 +436,15 @@ static noinline int create_subvol(struct inode *dir, memset(&root_item, 0, sizeof(root_item)); inode_item = &root_item.inode; - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); - root_item.flags = 0; - root_item.byte_limit = 0; - inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); + btrfs_set_root_flags(&root_item, 0); + btrfs_set_root_limit(&root_item, 0); + btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); btrfs_set_root_bytenr(&root_item, leaf->start); btrfs_set_root_generation(&root_item, trans->transid); @@ -457,8 +457,8 @@ static noinline int create_subvol(struct inode *dir, btrfs_root_generation(&root_item)); uuid_le_gen(&new_uuid); memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); - root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); - root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); + btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); + btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); root_item.ctime = root_item.otime; btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_otransid(&root_item, trans->transid); @@ -4011,10 +4011,10 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); - root_item->stime.sec = cpu_to_le64(sa->stime.sec); - root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); - root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); - root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); + btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); + btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); + btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); + btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); -- cgit v1.2.3 From a1b83ac52d23c85581b87836c346fb5cb90f6bfd Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 19 Jul 2013 17:39:32 +0800 Subject: btrfs: fix get set label blocking against balance btrfs_ioctl_get_fslabel() and btrfs_ioctl_set_fslabel() used root->fs_info->volume_mutex mutex which caused operations like balance to block set/get label operation until its completion and generally balance operation takes a long time to complete, so it will be annoying to the user when cli appears hung also this patch will add a bit of optimization within the btrfs_ioctl_get_falabel() function. v1->v2: use fs_info->super_lock instead of uuid_mutex Signed-off-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f856c97952db..556b3d5b18da 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4042,18 +4042,22 @@ out: static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) { struct btrfs_root *root = BTRFS_I(file_inode(file))->root; - const char *label = root->fs_info->super_copy->label; - size_t len = strnlen(label, BTRFS_LABEL_SIZE); + size_t len; int ret; + char label[BTRFS_LABEL_SIZE]; + + spin_lock(&root->fs_info->super_lock); + memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); + spin_unlock(&root->fs_info->super_lock); + + len = strnlen(label, BTRFS_LABEL_SIZE); if (len == BTRFS_LABEL_SIZE) { pr_warn("btrfs: label is too long, return the first %zu bytes\n", --len); } - mutex_lock(&root->fs_info->volume_mutex); ret = copy_to_user(arg, label, len); - mutex_unlock(&root->fs_info->volume_mutex); return ret ? -EFAULT : 0; } @@ -4082,18 +4086,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) if (ret) return ret; - mutex_lock(&root->fs_info->volume_mutex); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out_unlock; } + spin_lock(&root->fs_info->super_lock); strcpy(super_block->label, label); + spin_unlock(&root->fs_info->super_lock); ret = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->volume_mutex); mnt_drop_write_file(file); return ret; } -- cgit v1.2.3 From 77fe20dc6291e10c3594124b4505161e701a3cc4 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:48 -0700 Subject: btrfs: abtract out range locking in clone ioctl() The range locking in btrfs_ioctl_clone is trivially broken out into it's own function. This reduces the complexity of btrfs_ioctl_clone() by a small bit and makes that locking code available to future functions in fs/btrfs/ioctl.c Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 556b3d5b18da..76c7657a7f53 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2470,6 +2470,26 @@ out: return ret; } +static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) +{ + /* do any pending delalloc/csum calc on src, one way or + another, and lock file content */ + while (1) { + struct btrfs_ordered_extent *ordered; + lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + ordered = btrfs_lookup_first_ordered_extent(inode, + off + len - 1); + if (!ordered && + !test_range_bit(&BTRFS_I(inode)->io_tree, off, + off + len - 1, EXTENT_DELALLOC, 0, NULL)) + break; + unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); + if (ordered) + btrfs_put_ordered_extent(ordered); + btrfs_wait_ordered_range(inode, off, len); + } +} + static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { @@ -2598,21 +2618,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, truncate_inode_pages_range(&inode->i_data, destoff, PAGE_CACHE_ALIGN(destoff + len) - 1); - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ - while (1) { - struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); - if (!ordered && - !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, - EXTENT_DELALLOC, 0, NULL)) - break; - unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); - if (ordered) - btrfs_put_ordered_extent(ordered); - btrfs_wait_ordered_range(src, off, len); - } + lock_extent_range(src, off, len); /* clone data */ key.objectid = btrfs_ino(src); -- cgit v1.2.3 From 32b7c687c52a0b78a7d41ce6423c2fd8f5ca92da Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:49 -0700 Subject: btrfs_ioctl_clone: Move clone code into it's own function There's some 250+ lines here that are easily encapsulated into their own function. I don't change how anything works here, just create and document the new btrfs_clone() function from btrfs_ioctl_clone() code. Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 254 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 139 insertions(+), 115 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 76c7657a7f53..5b5148a1b0d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2490,136 +2490,43 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +/** + * btrfs_clone() - clone a range from inode file to another + * + * @src: Inode to clone from + * @inode: Inode to clone to + * @off: Offset within source to start clone from + * @olen: Original length, passed by user, of range to clone + * @olen_aligned: Block-aligned value of olen, extent_same uses + * identical values here + * @destoff: Offset within @inode to start clone + */ +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff) { - struct inode *inode = file_inode(file); struct btrfs_root *root = BTRFS_I(inode)->root; - struct fd src_file; - struct inode *src; - struct btrfs_trans_handle *trans; - struct btrfs_path *path; + struct btrfs_path *path = NULL; struct extent_buffer *leaf; - char *buf; + struct btrfs_trans_handle *trans; + char *buf = NULL; struct btrfs_key key; u32 nritems; int slot; int ret; - u64 len = olen; - u64 bs = root->fs_info->sb->s_blocksize; - int same_inode = 0; - - /* - * TODO: - * - split compressed inline extents. annoying: we need to - * decompress into destination's address_space (the file offset - * may change, so source mapping won't do), then recompress (or - * otherwise reinsert) a subrange. - * - allow ranges within the same file to be cloned (provided - * they don't overlap)? - */ - - /* the destination must be opened for writing */ - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) - return -EINVAL; - - if (btrfs_root_readonly(root)) - return -EROFS; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - src_file = fdget(srcfd); - if (!src_file.file) { - ret = -EBADF; - goto out_drop_write; - } - - ret = -EXDEV; - if (src_file.file->f_path.mnt != file->f_path.mnt) - goto out_fput; - - src = file_inode(src_file.file); - - ret = -EINVAL; - if (src == inode) - same_inode = 1; - - /* the src must be open for reading */ - if (!(src_file.file->f_mode & FMODE_READ)) - goto out_fput; - - /* don't make the dst file partly checksummed */ - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != - (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) - goto out_fput; - - ret = -EISDIR; - if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) - goto out_fput; - - ret = -EXDEV; - if (src->i_sb != inode->i_sb) - goto out_fput; + u64 len = olen_aligned; ret = -ENOMEM; buf = vmalloc(btrfs_level_size(root, 0)); if (!buf) - goto out_fput; + return ret; path = btrfs_alloc_path(); if (!path) { vfree(buf); - goto out_fput; - } - path->reada = 2; - - if (!same_inode) { - if (inode < src) { - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - } - } else { - mutex_lock(&src->i_mutex); - } - - /* determine range to clone */ - ret = -EINVAL; - if (off + len > src->i_size || off + len < off) - goto out_unlock; - if (len == 0) - olen = len = src->i_size - off; - /* if we extend to eof, continue to block boundary */ - if (off + len == src->i_size) - len = ALIGN(src->i_size, bs) - off; - - /* verify the end result is block aligned */ - if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || - !IS_ALIGNED(destoff, bs)) - goto out_unlock; - - /* verify if ranges are overlapped within the same file */ - if (same_inode) { - if (destoff + len > off && destoff < off + len) - goto out_unlock; - } - - if (destoff > inode->i_size) { - ret = btrfs_cont_expand(inode, inode->i_size, destoff); - if (ret) - goto out_unlock; + return ret; } - /* truncate page cache pages from target inode range */ - truncate_inode_pages_range(&inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len) - 1); - - lock_extent_range(src, off, len); - + path->reada = 2; /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; @@ -2865,15 +2772,132 @@ next: key.offset++; } ret = 0; + out: btrfs_release_path(path); + btrfs_free_path(path); + vfree(buf); + return ret; +} + +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct fd src_file; + struct inode *src; + int ret; + u64 len = olen; + u64 bs = root->fs_info->sb->s_blocksize; + int same_inode = 0; + + /* + * TODO: + * - split compressed inline extents. annoying: we need to + * decompress into destination's address_space (the file offset + * may change, so source mapping won't do), then recompress (or + * otherwise reinsert) a subrange. + * - allow ranges within the same file to be cloned (provided + * they don't overlap)? + */ + + /* the destination must be opened for writing */ + if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) + return -EINVAL; + + if (btrfs_root_readonly(root)) + return -EROFS; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + ret = -EXDEV; + if (src_file.file->f_path.mnt != file->f_path.mnt) + goto out_fput; + + src = file_inode(src_file.file); + + ret = -EINVAL; + if (src == inode) + same_inode = 1; + + /* the src must be open for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) + goto out_fput; + + ret = -EISDIR; + if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src->i_sb != inode->i_sb) + goto out_fput; + + if (!same_inode) { + if (inode < src) { + mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + } + } else { + mutex_lock(&src->i_mutex); + } + + /* determine range to clone */ + ret = -EINVAL; + if (off + len > src->i_size || off + len < off) + goto out_unlock; + if (len == 0) + olen = len = src->i_size - off; + /* if we extend to eof, continue to block boundary */ + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) + goto out_unlock; + + /* verify if ranges are overlapped within the same file */ + if (same_inode) { + if (destoff + len > off && destoff < off + len) + goto out_unlock; + } + + if (destoff > inode->i_size) { + ret = btrfs_cont_expand(inode, inode->i_size, destoff); + if (ret) + goto out_unlock; + } + + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len) - 1); + + lock_extent_range(src, off, len); + + ret = btrfs_clone(src, inode, off, olen, len, destoff); + unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); out_unlock: mutex_unlock(&src->i_mutex); if (!same_inode) mutex_unlock(&inode->i_mutex); - vfree(buf); - btrfs_free_path(path); out_fput: fdput(src_file); out_drop_write: -- cgit v1.2.3 From 416161db9b63e353a8fb79d1369779175102fca1 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 6 Aug 2013 11:42:51 -0700 Subject: btrfs: offline dedupe This patch adds an ioctl, BTRFS_IOC_FILE_EXTENT_SAME which will try to de-duplicate a list of extents across a range of files. Internally, the ioctl re-uses code from the clone ioctl. This avoids rewriting a large chunk of extent handling code. Userspace passes in an array of file, offset pairs along with a length argument. The ioctl will then (for each dedupe) do a byte-by-byte comparison of the user data before deduping the extent. Status and number of bytes deduped are returned for each operation. Signed-off-by: Mark Fasheh Reviewed-by: Zach Brown Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5b5148a1b0d3..022d8364e072 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -57,6 +58,9 @@ #include "send.h" #include "dev-replace.h" +static int btrfs_clone(struct inode *src, struct inode *inode, + u64 off, u64 olen, u64 olen_aligned, u64 destoff); + /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) { @@ -2470,6 +2474,34 @@ out: return ret; } +static struct page *extent_same_get_page(struct inode *inode, u64 off) +{ + struct page *page; + pgoff_t index; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + + index = off >> PAGE_CACHE_SHIFT; + + page = grab_cache_page(inode->i_mapping, index); + if (!page) + return NULL; + + if (!PageUptodate(page)) { + if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, + 0)) + return NULL; + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + return NULL; + } + } + unlock_page(page); + + return page; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2490,6 +2522,251 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } +static void btrfs_double_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); + + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); +} + +static void btrfs_double_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + if (inode1 < inode2) { + swap(inode1, inode2); + swap(loff1, loff2); + } + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + lock_extent_range(inode1, loff1, len); + if (inode1 != inode2) { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + lock_extent_range(inode2, loff2, len); + } +} + +static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, + u64 dst_loff, u64 len) +{ + int ret = 0; + struct page *src_page, *dst_page; + unsigned int cmp_len = PAGE_CACHE_SIZE; + void *addr, *dst_addr; + + while (len) { + if (len < PAGE_CACHE_SIZE) + cmp_len = len; + + src_page = extent_same_get_page(src, loff); + if (!src_page) + return -EINVAL; + dst_page = extent_same_get_page(dst, dst_loff); + if (!dst_page) { + page_cache_release(src_page); + return -EINVAL; + } + addr = kmap_atomic(src_page); + dst_addr = kmap_atomic(dst_page); + + flush_dcache_page(src_page); + flush_dcache_page(dst_page); + + if (memcmp(addr, dst_addr, cmp_len)) + ret = BTRFS_SAME_DATA_DIFFERS; + + kunmap_atomic(addr); + kunmap_atomic(dst_addr); + page_cache_release(src_page); + page_cache_release(dst_page); + + if (ret) + break; + + loff += cmp_len; + dst_loff += cmp_len; + len -= cmp_len; + } + + return ret; +} + +static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) +{ + u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; + + if (off + len > inode->i_size || off + len < off) + return -EINVAL; + /* Check that we are block aligned - btrfs_clone() requires this */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) + return -EINVAL; + + return 0; +} + +static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, + struct inode *dst, u64 dst_loff) +{ + int ret; + + /* + * btrfs_clone() can't handle extents in the same file + * yet. Once that works, we can drop this check and replace it + * with a check for the same inode, but overlapping extents. + */ + if (src == dst) + return -EINVAL; + + btrfs_double_lock(src, loff, dst, dst_loff, len); + + ret = extent_same_check_offsets(src, loff, len); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, len); + if (ret) + goto out_unlock; + + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { + ret = -EINVAL; + goto out_unlock; + } + + ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + if (ret == 0) + ret = btrfs_clone(src, dst, loff, len, len, dst_loff); + +out_unlock: + btrfs_double_unlock(src, loff, dst, dst_loff, len); + + return ret; +} + +#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) + +static long btrfs_ioctl_file_extent_same(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_same_args *args = argp; + struct btrfs_ioctl_same_args same; + struct btrfs_ioctl_same_extent_info info; + struct inode *src = file->f_dentry->d_inode; + struct file *dst_file = NULL; + struct inode *dst; + u64 off; + u64 len; + int i; + int ret; + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + bool is_admin = capable(CAP_SYS_ADMIN); + + if (!(file->f_mode & FMODE_READ)) + return -EINVAL; + + ret = mnt_want_write_file(file); + if (ret) + return ret; + + if (copy_from_user(&same, + (struct btrfs_ioctl_same_args __user *)argp, + sizeof(same))) { + ret = -EFAULT; + goto out; + } + + off = same.logical_offset; + len = same.length; + + /* + * Limit the total length we will dedupe for each operation. + * This is intended to bound the total time spent in this + * ioctl to something sane. + */ + if (len > BTRFS_MAX_DEDUPE_LEN) + len = BTRFS_MAX_DEDUPE_LEN; + + if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + ret = -EINVAL; + goto out; + } + + ret = -EISDIR; + if (S_ISDIR(src->i_mode)) + goto out; + + ret = -EACCES; + if (!S_ISREG(src->i_mode)) + goto out; + + ret = 0; + for (i = 0; i < same.dest_count; i++) { + if (copy_from_user(&info, &args->info[i], sizeof(info))) { + ret = -EFAULT; + goto out; + } + + info.bytes_deduped = 0; + + dst_file = fget(info.fd); + if (!dst_file) { + info.status = -EBADF; + goto next; + } + + if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { + info.status = -EINVAL; + goto next; + } + + info.status = -EXDEV; + if (file->f_path.mnt != dst_file->f_path.mnt) + goto next; + + dst = dst_file->f_dentry->d_inode; + if (src->i_sb != dst->i_sb) + goto next; + + if (S_ISDIR(dst->i_mode)) { + info.status = -EISDIR; + goto next; + } + + if (!S_ISREG(dst->i_mode)) { + info.status = -EACCES; + goto next; + } + + info.status = btrfs_extent_same(src, off, len, dst, + info.logical_offset); + if (info.status == 0) + info.bytes_deduped += len; + +next: + if (dst_file) + fput(dst_file); + + if (__put_user_unaligned(info.status, &args->info[i].status) || + __put_user_unaligned(info.bytes_deduped, + &args->info[i].bytes_deduped)) { + ret = -EFAULT; + goto out; + } + } + +out: + mnt_drop_write_file(file); + return ret; +} + /** * btrfs_clone() - clone a range from inode file to another * @@ -4242,6 +4519,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_get_fslabel(file, argp); case BTRFS_IOC_SET_FSLABEL: return btrfs_ioctl_set_fslabel(file, argp); + case BTRFS_IOC_FILE_EXTENT_SAME: + return btrfs_ioctl_file_extent_same(file, argp); } return -ENOTTY; -- cgit v1.2.3 From 175a2b871f46272a2aedce5fb3222a72568b84c3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 12 Aug 2013 15:36:44 -0400 Subject: Btrfs: don't allow a subvol to be deleted if it is the default subovl Eric pointed out that btrfs will happily allow you to delete the default subvol. This is a problem obviously since the next time you go to mount the file system it will freak out because it can't find the root. Fix this by adding a check to see if our default subvol points to the subvol we are trying to delete, and if it does not allowing it to happen. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 022d8364e072..317a984fe3c9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1726,13 +1726,28 @@ out: static noinline int may_destroy_subvol(struct btrfs_root *root) { struct btrfs_path *path; + struct btrfs_dir_item *di; struct btrfs_key key; + u64 dir_id; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + /* Make sure this root isn't set as the default subvol */ + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, + dir_id, "default", 7, 0); + if (di && !IS_ERR(di)) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + if (key.objectid == root->root_key.objectid) { + ret = -ENOTEMPTY; + goto out; + } + btrfs_release_path(path); + } + key.objectid = root->root_key.objectid; key.type = BTRFS_ROOT_REF_KEY; key.offset = (u64)-1; -- cgit v1.2.3 From a696cf3529cecd261d6534fec242e39177b57e19 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 14 Aug 2013 03:00:20 +0100 Subject: Btrfs: add missing error code to BTRFS_IOC_INO_LOOKUP handler If the path doesn't fit in the input buffer, return ENAMETOOLONG instead of returning with a success code (0) and a partially filled and right justified buffer. Also removed useless buffer pointer check outside the while loop. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 317a984fe3c9..f88eeea365f3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2030,8 +2030,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; - if (ptr < name) + if (ptr < name) { + ret = -ENAMETOOLONG; goto out; + } *(ptr + len) = '/'; read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); @@ -2044,8 +2046,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.offset = (u64)-1; dirid = key.objectid; } - if (ptr < name) - goto out; memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; -- cgit v1.2.3 From 18674c6cc10e78745f7af7faf26583ea9fe78c38 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 14 Aug 2013 03:00:21 +0100 Subject: Btrfs: don't miss inode ref items in BTRFS_IOC_INO_LOOKUP If the inode ref key was not found and the current leaf slot was 0 (first item in the leaf) the code would always return -ENOENT. This was not correct because the desired inode ref item might be the last item in the previous leaf. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f88eeea365f3..82fb11a361c4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2013,19 +2013,21 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; + else if (ret > 0) { + ret = btrfs_previous_item(root, path, dirid, + BTRFS_INODE_REF_KEY); + if (ret < 0) + goto out; + else if (ret > 0) { + ret = -ENOENT; + goto out; + } + } l = path->nodes[0]; slot = path->slots[0]; - if (ret > 0 && slot > 0) - slot--; btrfs_item_key_to_cpu(l, &key, slot); - if (ret > 0 && (key.objectid != dirid || - key.type != BTRFS_INODE_REF_KEY)) { - ret = -ENOENT; - goto out; - } - iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; -- cgit v1.2.3 From dd5f9615fc5c5e8d3751aab3a17b92768fb1ce70 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:20 +0200 Subject: Btrfs: maintain subvolume items in the UUID tree When a new subvolume or snapshot is created, a new UUID item is added to the UUID tree. Such items are removed when the subvolume is deleted. The ioctl to set the received subvolume UUID is also touched and will now also add this received UUID into the UUID tree together with the ID of the subvolume. The latter is also done when read-only snapshots are created which inherit all the send/receive information from the parent subvolume. User mode programs use the BTRFS_IOC_TREE_SEARCH ioctl to search and read in the UUID tree. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82fb11a361c4..8e75249282c2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) return 0; } +int btrfs_is_empty_uuid(u8 *uuid) +{ + static char empty_uuid[BTRFS_UUID_SIZE] = {0}; + + return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); +} + static noinline int create_subvol(struct inode *dir, struct dentry *dentry, char *name, int namelen, @@ -400,7 +407,7 @@ static noinline int create_subvol(struct inode *dir, * of create_snapshot(). */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, - 7, &qgroup_reserved, false); + 8, &qgroup_reserved, false); if (ret) return ret; @@ -522,9 +529,14 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, objectid, root->root_key.objectid, btrfs_ino(dir), index, name, namelen); - BUG_ON(ret); + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + objectid); + if (ret) + btrfs_abort_transaction(trans, root, ret); + fail: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -577,9 +589,10 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, * 1 - root item * 2 - root ref/backref * 1 - root of snapshot + * 1 - UUID item */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, 7, + &pending_snapshot->block_rsv, 8, &pending_snapshot->qgroup_reserved, false); if (ret) @@ -2235,6 +2248,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_end_trans; } } + + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { + ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + dest->root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, root, ret); + err = ret; + goto out_end_trans; + } + } + out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@ -2446,7 +2480,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; int ret = 0; char *s_uuid = NULL; - char empty_uuid[BTRFS_UUID_SIZE] = {0}; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -2455,7 +2488,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) if (IS_ERR(di_args)) return PTR_ERR(di_args); - if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) + if (!btrfs_is_empty_uuid(di_args->uuid)) s_uuid = di_args->uuid; mutex_lock(&fs_devices->device_list_mutex); @@ -4292,6 +4325,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, struct btrfs_trans_handle *trans; struct timespec ct = CURRENT_TIME; int ret = 0; + int received_uuid_changed; ret = mnt_want_write_file(file); if (ret < 0) @@ -4321,7 +4355,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, goto out; } - trans = btrfs_start_transaction(root, 1); + /* + * 1 - root item + * 2 - uuid items (received uuid + subvol uuid) + */ + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; @@ -4332,6 +4370,14 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, sa->rtime.sec = ct.tv_sec; sa->rtime.nsec = ct.tv_nsec; + received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, + BTRFS_UUID_SIZE); + if (received_uuid_changed && + !btrfs_is_empty_uuid(root_item->received_uuid)) + btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, + root_item->received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); btrfs_set_root_stransid(root_item, sa->stransid); btrfs_set_root_rtransid(root_item, sa->rtransid); @@ -4344,12 +4390,22 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, &root->root_key, &root->root_item); if (ret < 0) { btrfs_end_transaction(trans, root); - trans = NULL; goto out; - } else { - ret = btrfs_commit_transaction(trans, root); - if (ret < 0) + } + if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { + ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, + sa->uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + root->root_key.objectid); + if (ret < 0 && ret != -EEXIST) { + btrfs_abort_transaction(trans, root, ret); goto out; + } + } + ret = btrfs_commit_transaction(trans, root); + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); + goto out; } ret = copy_to_user(arg, sa, sizeof(*sa)); -- cgit v1.2.3 From 633085c79c84c391732caa40f5b3a3d2b299f7be Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 16 Aug 2013 15:23:33 +0100 Subject: Btrfs: reset force_compress on btrfs_file_defrag failure After we set force_compress with a new value (which was not being done while holding the inode mutex), if an error happens and we jump to the label out_ra, the force_compress property of the inode is not set to BTRFS_COMPRESS_NONE (unlike in the case where no errors happen). Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8e75249282c2..076cc7fff813 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1285,9 +1285,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, cluster = max_cluster; } - if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) - BTRFS_I(inode)->force_compress = compress_type; - if (i + cluster > ra_index) { ra_index = max(i, ra_index); btrfs_force_ra(inode->i_mapping, ra, file, ra_index, @@ -1296,6 +1293,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = compress_type; ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) { mutex_unlock(&inode->i_mutex); @@ -1352,10 +1351,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, atomic_read(&root->fs_info->async_delalloc_pages) == 0)); } atomic_dec(&root->fs_info->async_submit_draining); - - mutex_lock(&inode->i_mutex); - BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; - mutex_unlock(&inode->i_mutex); } if (range->compress_type == BTRFS_COMPRESS_LZO) { @@ -1365,6 +1360,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ret = defrag_count; out_ra: + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; + mutex_unlock(&inode->i_mutex); + } if (!file) kfree(ra); kfree(pages); -- cgit v1.2.3 From bbb651e469d99f0088e286fdeb54acca7bb4ad4e Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 19 Aug 2013 18:51:13 +0200 Subject: Btrfs: don't allow the replace procedure on read only filesystems If you start the replace procedure on a read only filesystem, at the end the procedure fails to write the updated dev_items to the chunk tree. The problem is that this error is not indicated except for a WARN_ON(). If the user now thinks that everything was done as expected and destroys the source device (with mkfs or with a hammer). The next mount fails with "failed to read chunk root" and the filesystem is gone. This commit adds code to fail the attempt to start the replace procedure if the filesystem is mounted read-only. Signed-off-by: Stefan Behrens Cc: # 3.10+ Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 076cc7fff813..559cb161a60a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3670,6 +3670,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { -- cgit v1.2.3 From c1c9ff7c94e83fae89a742df74db51156869bad5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:07 +0200 Subject: Btrfs: Remove superfluous casts from u64 to unsigned long long u64 is "unsigned long long" on all architectures now, so there's no need to cast it when formatting it using the "ll" length modifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 559cb161a60a..996951a8d28d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1421,14 +1421,13 @@ static noinline int btrfs_ioctl_resize(struct file *file, ret = -EINVAL; goto out_free; } - printk(KERN_INFO "btrfs: resizing devid %llu\n", - (unsigned long long)devid); + printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); } device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", - (unsigned long long)devid); + devid); ret = -ENODEV; goto out_free; } @@ -1436,7 +1435,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (!device->writeable) { printk(KERN_INFO "btrfs: resizer unable to apply on " "readonly device %llu\n", - (unsigned long long)devid); + devid); ret = -EPERM; goto out_free; } @@ -1488,8 +1487,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, new_size *= root->sectorsize; printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", - rcu_str_deref(device->name), - (unsigned long long)new_size); + rcu_str_deref(device->name), new_size); if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); -- cgit v1.2.3 From fba6aa75654394fccf2530041e9451414c28084f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:14 +0200 Subject: Btrfs: Make btrfs_header_fsid() return unsigned long Internally, btrfs_header_fsid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 996951a8d28d..41addd502bc9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -436,8 +436,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), + write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(leaf), -- cgit v1.2.3 From b308bc2f05a86e728bd035e21a4974acd05f4d1e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:15 +0200 Subject: Btrfs: Make btrfs_header_chunk_tree_uuid() return unsigned long Internally, btrfs_header_chunk_tree_uuid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 41addd502bc9..0ce93ac17f69 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir, write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(leaf), + btrfs_header_chunk_tree_uuid(leaf), BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); -- cgit v1.2.3 From f71717502460c5cd6409b66835b17ae00af6d5f1 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 12 Aug 2013 20:56:58 +0100 Subject: Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl The handler for the ioctl BTRFS_IOC_FS_INFO was reading the number of devices before acquiring the device list mutex. This could lead to inconsistent results because the update of the device list and the number of devices counter (amongst other counters related to the device list) are updated in volumes.c while holding the device list mutex - except for 2 places, one was volumes.c:btrfs_prepare_sprout() and the other was volumes.c:device_list_add(). For example, if we have 2 devices, with IDs 1 and 2 and then add a new device, with ID 3, and while adding the device is in progress an BTRFS_IOC_FS_INFO ioctl arrives, it could return a number of devices of 2 and a max dev id of 3. This would be incorrect. Also, this ioctl handler was reading the fsid while it can be updated concurrently. This can happen when while a new device is being added and the current filesystem is in seeding mode. Example: $ mkfs.btrfs -f /dev/sdb1 $ mkfs.btrfs -f /dev/sdb2 $ btrfstune -S 1 /dev/sdb1 $ mount /dev/sdb1 /mnt/test $ btrfs device add /dev/sdb2 /mnt/test If during the last step a BTRFS_IOC_FS_INFO ioctl was requested, it could read an fsid that was never valid (some bits part of the old fsid and others part of the new fsid). Also, it could read a number of devices that doesn't match the number of devices in the list and the max device id, as explained before. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0ce93ac17f69..ddb4bc1252ad 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2453,10 +2453,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) if (!fi_args) return -ENOMEM; + mutex_lock(&fs_devices->device_list_mutex); fi_args->num_devices = fs_devices->num_devices; memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); - mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->devid > fi_args->max_id) fi_args->max_id = device->devid; -- cgit v1.2.3 From e57138b3e96e45a63124492e736612378096290f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 21 Aug 2013 11:44:48 +0800 Subject: btrfs: return btrfs error code for dev excl ops err now threads can return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS as defined in btrfs.h for the dev excl operation error in the FS, which means with this kernel would stop logging (almost an user error) into the /var/log/messages v2: accepts Josef' comment Signed-off-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ddb4bc1252ad..1a5b9462dd9a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1394,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); mnt_drop_write_file(file); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -2379,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - return -EINVAL; + return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } mutex_lock(&root->fs_info->volume_mutex); @@ -3673,8 +3671,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINPROGRESS; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } else { ret = btrfs_dev_replace_start(root, p); atomic_set( @@ -3918,8 +3915,7 @@ again: } else { /* this is (1) */ mutex_unlock(&fs_info->balance_mutex); - pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); - ret = -EINVAL; + ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out; } -- cgit v1.2.3 From f0de181c9b48a397c5a2fbe63dcdd2a26a872695 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 Sep 2013 10:55:51 -0400 Subject: Btrfs: kill delay_iput arg to the wait_ordered functions This is a left over of how we used to wait for ordered extents, which was to grab the inode and then run filemap flush on it. However if we have an ordered extent then we already are holding a ref on the inode, and we just use btrfs_start_ordered_extent anyway, so there is no reason to have an extra ref on the inode to start work on the ordered extent. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1a5b9462dd9a..fd6557e7118a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -574,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) return ret; - btrfs_wait_ordered_extents(root, 0); + btrfs_wait_ordered_extents(root); pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) -- cgit v1.2.3 From 1cecf579d161527f610ef0da852dd6122e434a49 Mon Sep 17 00:00:00 2001 From: chandan Date: Fri, 13 Sep 2013 19:34:10 +0530 Subject: Btrfs: btrfs_ioctl_default_subvol: Revert back to toplevel subvolume when arg is 0 This patch makes it possible to set BTRFS_FS_TREE_OBJECTID as the default subvolume by passing a subvolume id of 0. Signed-off-by: chandan Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd6557e7118a..fba259ab9994 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3310,7 +3310,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) } if (!objectid) - objectid = root->root_key.objectid; + objectid = BTRFS_FS_TREE_OBJECTID; location.objectid = objectid; location.type = BTRFS_ROOT_ITEM_KEY; -- cgit v1.2.3 From cbf8b8ca3ec799c11a4cbb29d812d84adf2990b0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 17 Sep 2013 15:43:54 -0700 Subject: btrfs: change extent-same to copy entire argument struct btrfs_ioctl_file_extent_same() uses __put_user_unaligned() to copy some data back to it's argument struct. Unfortunately, not all architectures provide __put_user_unaligned(), so compiles break on them if btrfs is selected. Instead, just copy the whole struct in / out at the start and end of operations, respectively. Signed-off-by: Mark Fasheh Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 76 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fba259ab9994..9d46f60cb943 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2696,9 +2696,9 @@ out_unlock: static long btrfs_ioctl_file_extent_same(struct file *file, void __user *argp) { - struct btrfs_ioctl_same_args *args = argp; - struct btrfs_ioctl_same_args same; - struct btrfs_ioctl_same_extent_info info; + struct btrfs_ioctl_same_args tmp; + struct btrfs_ioctl_same_args *same; + struct btrfs_ioctl_same_extent_info *info; struct inode *src = file->f_dentry->d_inode; struct file *dst_file = NULL; struct inode *dst; @@ -2706,6 +2706,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file, u64 len; int i; int ret; + unsigned long size; u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; bool is_admin = capable(CAP_SYS_ADMIN); @@ -2716,15 +2717,30 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (ret) return ret; - if (copy_from_user(&same, + if (copy_from_user(&tmp, (struct btrfs_ioctl_same_args __user *)argp, - sizeof(same))) { + sizeof(tmp))) { ret = -EFAULT; goto out; } - off = same.logical_offset; - len = same.length; + size = sizeof(tmp) + + tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info); + + same = kmalloc(size, GFP_NOFS); + if (!same) { + ret = -EFAULT; + goto out; + } + + if (copy_from_user(same, + (struct btrfs_ioctl_same_args __user *)argp, size)) { + ret = -EFAULT; + goto out; + } + + off = same->logical_offset; + len = same->length; /* * Limit the total length we will dedupe for each operation. @@ -2752,27 +2768,28 @@ static long btrfs_ioctl_file_extent_same(struct file *file, if (!S_ISREG(src->i_mode)) goto out; - ret = 0; - for (i = 0; i < same.dest_count; i++) { - if (copy_from_user(&info, &args->info[i], sizeof(info))) { - ret = -EFAULT; - goto out; - } + /* pre-format output fields to sane values */ + for (i = 0; i < same->dest_count; i++) { + same->info[i].bytes_deduped = 0ULL; + same->info[i].status = 0; + } - info.bytes_deduped = 0; + ret = 0; + for (i = 0; i < same->dest_count; i++) { + info = &same->info[i]; - dst_file = fget(info.fd); + dst_file = fget(info->fd); if (!dst_file) { - info.status = -EBADF; + info->status = -EBADF; goto next; } if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { - info.status = -EINVAL; + info->status = -EINVAL; goto next; } - info.status = -EXDEV; + info->status = -EXDEV; if (file->f_path.mnt != dst_file->f_path.mnt) goto next; @@ -2781,32 +2798,29 @@ static long btrfs_ioctl_file_extent_same(struct file *file, goto next; if (S_ISDIR(dst->i_mode)) { - info.status = -EISDIR; + info->status = -EISDIR; goto next; } if (!S_ISREG(dst->i_mode)) { - info.status = -EACCES; + info->status = -EACCES; goto next; } - info.status = btrfs_extent_same(src, off, len, dst, - info.logical_offset); - if (info.status == 0) - info.bytes_deduped += len; + info->status = btrfs_extent_same(src, off, len, dst, + info->logical_offset); + if (info->status == 0) + info->bytes_deduped += len; next: if (dst_file) fput(dst_file); - - if (__put_user_unaligned(info.status, &args->info[i].status) || - __put_user_unaligned(info.bytes_deduped, - &args->info[i].bytes_deduped)) { - ret = -EFAULT; - goto out; - } } + ret = copy_to_user(argp, same, size); + if (ret) + ret = -EFAULT; + out: mnt_drop_write_file(file); return ret; -- cgit v1.2.3