From 22c44fe65adacd20a174f3f54686509ee94ef7be Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 30 Nov 2011 10:45:38 -0500 Subject: Btrfs: deal with enospc from dirtying inodes properly Now that we're properly keeping track of delayed inode space we've been getting a lot of warnings out of btrfs_dirty_inode() when running xfstest 83. This is because a bunch of people call mark_inode_dirty, which is void so we can't return ENOSPC. This needs to be fixed in a few areas 1) file_update_time - this updates the mtime and such when writing to a file, which will call mark_inode_dirty. So copy file_update_time into btrfs so we can call btrfs_dirty_inode directly and return an error if we get one appropriately. 2) fix symlinks to use btrfs_setattr for ->setattr. For some reason we weren't setting ->setattr for symlinks, even though we should have been. This catches one of the cases where we were getting errors in mark_inode_dirty. 3) Fix btrfs_setattr and btrfs_setsize to call btrfs_dirty_inode directly instead of mark_inode_dirty. This lets us return errors properly for truncate and chown/anything related to setattr. 4) Add a new btrfs_fs_dirty_inode which will just call btrfs_dirty_inode and print an error if we have one. The only remaining user we can't control for this is touch_atime(), but we don't really want to keep people from walking down the tree if we don't have space to save the atime update, so just complain but don't worry about it. With this patch xfstests 83 complains a handful of times instead of hundreds of times. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/file.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f2e928289600..cc7492c823f3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1387,7 +1387,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - file_update_time(file); + err = btrfs_update_time(file); + if (err) { + mutex_unlock(&inode->i_mutex); + goto out; + } BTRFS_I(inode)->sequence++; start_pos = round_down(pos, root->sectorsize); -- cgit v1.2.3 From 142349f541d0bb6bc3e0d4563268105aada42b0b Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 16 Dec 2011 12:32:57 -0500 Subject: btrfs: lower the dirty balance poll interval Tests show that the original large intervals can easily make the dirty limit exceeded on 100 concurrent dd's. So adapt to as large as the next check point selected by the dirty throttling algorithm. Signed-off-by: Wu Fengguang Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dafdfa059bf6..52305a885c3f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); + nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); + nrptrs = max(nrptrs, 8); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); if (!pages) return -ENOMEM; -- cgit v1.2.3 From 32c7f202a4801252a0f3578807b75a961f792870 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 8 Aug 2011 15:19:47 -0600 Subject: btrfs: fix dirtied pages accounting on sub-page writes When doing 1KB sequential writes to the same page, balance_dirty_pages_ratelimited_nr() should be called once instead of 4 times, the latter makes the dirtier tasks be throttled much too heavy. Fix it with proper de-accounting on clear_page_dirty_for_io(). CC: Chris Mason Signed-off-by: Wu Fengguang --- fs/btrfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97fbe939c050..bfb620ead295 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1136,7 +1136,8 @@ again: GFP_NOFS); } for (i = 0; i < num_pages; i++) { - clear_page_dirty_for_io(pages[i]); + if (clear_page_dirty_for_io(pages[i])) + account_page_redirty(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } -- cgit v1.2.3 From 66d7e7f09f77456fe68683247d77721032a00ee5 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 12 Sep 2011 15:26:38 +0200 Subject: Btrfs: mark delayed refs as for cow Add a for_cow parameter to add_delayed_*_ref and pass the appropriate value from every call site. The for_cow parameter will later on be used to determine if a ref will change anything with respect to qgroups. Delayed refs coming from relocation are always counted as for_cow, as they don't change subvol quota. Also pass in the fs_info for later use. btrfs_find_all_roots() will use this as an optimization, as changes that are for_cow will not change anything with respect to which root points to a certain leaf. Thus, we don't need to add the current sequence number to those delayed refs. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f2e928289600..d2b60ed6c33f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -678,7 +678,7 @@ next_slot: disk_bytenr, num_bytes, 0, root->root_key.objectid, new_key.objectid, - start - extent_offset); + start - extent_offset, 0); BUG_ON(ret); *hint_byte = disk_bytenr; } @@ -753,7 +753,7 @@ next_slot: disk_bytenr, num_bytes, 0, root->root_key.objectid, key.objectid, key.offset - - extent_offset); + extent_offset, 0); BUG_ON(ret); inode_sub_bytes(inode, extent_end - key.offset); @@ -962,7 +962,7 @@ again: ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset); + ino, orig_offset, 0); BUG_ON(ret); if (split == start) { @@ -989,7 +989,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset); + ino, orig_offset, 0); BUG_ON(ret); } other_start = 0; @@ -1006,7 +1006,7 @@ again: del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset); + ino, orig_offset, 0); BUG_ON(ret); } if (del_nr == 0) { -- cgit v1.2.3 From e3a41a5ba9c2ab988b9f1442925109dca2382fd9 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 10 Jan 2012 15:07:55 -0800 Subject: btrfs: pass __GFP_WRITE for buffered write page allocations Tell the page allocator that pages allocated for a buffered write are expected to become dirty soon. Signed-off-by: Johannes Weiner Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Minchan Kim Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Christoph Hellwig Cc: Wu Fengguang Cc: Dave Chinner Cc: Jan Kara Cc: Shaohua Li Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97fbe939c050..20375e6691c3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, again: for (i = 0; i < num_pages; i++) { pages[i] = find_or_create_page(inode->i_mapping, index + i, - mask); + mask | __GFP_WRITE); if (!pages[i]) { faili = i - 1; err = -ENOMEM; -- cgit v1.2.3 From 45a8090e626ab470c91142954431a93846030b0d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 12 Jan 2012 19:10:12 -0500 Subject: Btrfs: don't call btrfs_throttle in file write Btrfs_throttle will make us wait if there is a currently committing transaction until we can open new transactions, which is ridiculous since we don't actually start any transactions within the file write path anyway, so all this does is introduce big latencies if we have a sync/fsync heavy workload going on while somebody else is trying to do work. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fc97b00bd871..0f61e11a2998 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1273,7 +1273,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, dirty_pages); if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); pos += copied; num_written += copied; -- cgit v1.2.3 From d98456fcafa6f3fd1985f9b7429aaa3531c6bfa0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 31 Jan 2012 20:27:41 -0500 Subject: Btrfs: don't reserve data with extents locked in btrfs_fallocate btrfs_fallocate tries to allocate space only if ranges in the file don't already exist. But the enospc checks it does are not allowed with extents locked. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0f61e11a2998..0621a3a7d5d1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,14 @@ static long btrfs_fallocate(struct file *file, int mode, if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; + /* + * Make sure we have enough space before we do the + * allocation. + */ + ret = btrfs_check_data_free_space(inode, len); + if (ret) + return ret; + /* * wait for ordered IO before we have any locks. We'll loop again * below with the locks held. @@ -1666,27 +1674,12 @@ static long btrfs_fallocate(struct file *file, int mode, if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { - - /* - * Make sure we have enough space before we do the - * allocation. - */ - ret = btrfs_check_data_free_space(inode, last_byte - - cur_offset); - if (ret) { - free_extent_map(em); - break; - } - ret = btrfs_prealloc_file_range(inode, mode, cur_offset, last_byte - cur_offset, 1 << inode->i_blkbits, offset + len, &alloc_hint); - /* Let go of our reservation. */ - btrfs_free_reserved_data_space(inode, last_byte - - cur_offset); if (ret < 0) { free_extent_map(em); break; @@ -1714,6 +1707,8 @@ static long btrfs_fallocate(struct file *file, int mode, &cached_state, GFP_NOFS); out: mutex_unlock(&inode->i_mutex); + /* Let go of our reservation. */ + btrfs_free_reserved_data_space(inode, len); return ret; } -- cgit v1.2.3 From 6af021d8fc3bcce790e7fbb391e39c5920fa3f71 Mon Sep 17 00:00:00 2001 From: Jeff Liu Date: Thu, 9 Feb 2012 14:25:50 +0800 Subject: Btrfs: return the internal error unchanged if btrfs_get_extent_fiemap() call failed for SEEK_DATA/SEEK_HOLE inquiry Given that ENXIO only means "offset beyond EOF" for either SEEK_DATA or SEEK_HOLE inquiry in a desired file range, so we should return the internal error unchanged if btrfs_get_extent_fiemap() call failed, rather than ENXIO. Cc: Dave Chinner Signed-off-by: Jie Liu --- fs/btrfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/file.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0621a3a7d5d1..3a520899b024 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1755,7 +1755,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) start - root->sectorsize, root->sectorsize, 0); if (IS_ERR(em)) { - ret = -ENXIO; + ret = PTR_ERR(em); goto out; } last_end = em->start + em->len; @@ -1767,7 +1767,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) while (1) { em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { - ret = -ENXIO; + ret = PTR_ERR(em); break; } -- cgit v1.2.3