From cd354ad613a393424f85333ceed6b15e07fb94ae Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 20 Oct 2011 15:45:37 -0400 Subject: Btrfs: don't wait as long for more batches during SSD log commit When we're doing log commits, we try to wait for more writers to come in and make the commit bigger. This helps improve performance on rotating disks, but on SSDs it adds latencies. Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 786639fca067..310ab22cfe58 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2013,10 +2013,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* wait for previous tree log sync to complete */ if (atomic_read(&root->log_commit[(index1 + 1) % 2])) wait_log_commit(trans, root, root->log_transid - 1); - while (1) { unsigned long batch = root->log_batch; - if (root->log_multiple_pids) { + /* when we're on an ssd, just kick the log commit out */ + if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&root->log_mutex); -- cgit v1.2.3 From e688b7252f784c2479d559f9f70ca8354752c5e7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 31 Oct 2011 20:52:39 -0400 Subject: Btrfs: fix extent pinning bugs in the tree log The tree log had two important bugs that could cause corruptions after a crash. Sometimes we were allowing tree log blocks to be reused after the tree log was committed but before the transaction commit was done. This allowed a future metadata write to overwrite the tree log data. It is fixed by adding a new variant of freeing reserved extents that always pins them. Credit goes to Stefan Behrens and Arne Jansen for many many hours spent tracking this bug down. During tree log replay, we do a pass through the tree log and pin all the extents we find. This makes sure the replay code won't go in and use any of those blocks for new allocations during replay. The problem is the free space cache isn't honoring these pinned extents. So the allocator can end up handing them out, leading to all kinds of problems during replay. The fix here is to force any free space cache to load while we pin the extents, and then to make sure we remove the pinned extents from the free space rbtree. Signed-off-by: Chris Mason Reported-by: Stefan Behrens --- fs/btrfs/tree-log.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 310ab22cfe58..8ca1b6b83bd1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_pin_extent(log->fs_info->extent_root, - eb->start, eb->len, 0); + btrfs_pin_extent_for_log_replay(wc->trans, + log->fs_info->extent_root, + eb->start, eb->len); if (btrfs_buffer_uptodate(eb, gen)) { if (wc->write) @@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(root, + ret = btrfs_free_and_pin_reserved_extent(root, bytenr, blocksize); BUG_ON(ret); } @@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_unlock(next); WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(root, + ret = btrfs_free_and_pin_reserved_extent(root, path->nodes[*level]->start, path->nodes[*level]->len); BUG_ON(ret); @@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(log, next->start, + ret = btrfs_free_and_pin_reserved_extent(log, next->start, next->len); BUG_ON(ret); } -- cgit v1.2.3 From 6c41761fc6efe1503103a1afe03a6635c0b5d4ec Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 13 Apr 2011 15:41:04 +0200 Subject: btrfs: separate superblock items out of fs_info fs_info has now ~9kb, more than fits into one page. This will cause mount failure when memory is too fragmented. Top space consumers are super block structures super_copy and super_for_commit, ~2.8kb each. Allocate them dynamically. fs_info will be ~3.5kb. (measured on x86_64) Add a wrapper for freeing fs_info and all of it's dynamically allocated members. Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8ca1b6b83bd1..f4d81c06d48f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2118,9 +2118,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); - btrfs_set_super_log_root(&root->fs_info->super_for_commit, + btrfs_set_super_log_root(root->fs_info->super_for_commit, log_root_tree->node->start); - btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, + btrfs_set_super_log_root_level(root->fs_info->super_for_commit, btrfs_header_level(log_root_tree->node)); log_root_tree->log_batch = 0; -- cgit v1.2.3