diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 87 |
1 files changed, 38 insertions, 49 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05dc3c17cb62..3f0b6d1936e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -125,8 +125,8 @@ struct async_submit_bio { * Different roots are used for different purposes and may nest inside each * other and they require separate keysets. As lockdep keys should be * static, assign keysets according to the purpose of the root as indicated - * by btrfs_root->objectid. This ensures that all special purpose roots - * have separate keysets. + * by btrfs_root->root_key.objectid. This ensures that all special purpose + * roots have separate keysets. * * Lock-nesting across peer nodes is always done with the immediate parent * node locked thus preventing deadlock. As lockdep doesn't know this, use @@ -1148,7 +1148,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->state = 0; root->orphan_cleanup_state = 0; - root->objectid = objectid; root->last_trans = 0; root->highest_objectid = 0; root->nr_delalloc_inodes = 0; @@ -1665,9 +1664,8 @@ static int cleaner_kthread(void *arg) struct btrfs_root *root = arg; struct btrfs_fs_info *fs_info = root->fs_info; int again; - struct btrfs_trans_handle *trans; - do { + while (1) { again = 0; /* Make the cleaner go to sleep early. */ @@ -1716,42 +1714,16 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: + if (kthread_should_park()) + kthread_parkme(); + if (kthread_should_stop()) + return 0; if (!again) { set_current_state(TASK_INTERRUPTIBLE); - if (!kthread_should_stop()) - schedule(); + schedule(); __set_current_state(TASK_RUNNING); } - } while (!kthread_should_stop()); - - /* - * Transaction kthread is stopped before us and wakes us up. - * However we might have started a new transaction and COWed some - * tree blocks when deleting unused block groups for example. So - * make sure we commit the transaction we started to have a clean - * shutdown when evicting the btree inode - if it has dirty pages - * when we do the final iput() on it, eviction will trigger a - * writeback for it which will fail with null pointer dereferences - * since work queues and other resources were already released and - * destroyed by the time the iput/eviction/writeback is made. - */ - trans = btrfs_attach_transaction(root); - if (IS_ERR(trans)) { - if (PTR_ERR(trans) != -ENOENT) - btrfs_err(fs_info, - "cleaner transaction attach returned %ld", - PTR_ERR(trans)); - } else { - int ret; - - ret = btrfs_commit_transaction(trans); - if (ret) - btrfs_err(fs_info, - "cleaner open transaction commit returned %d", - ret); } - - return 0; } static int transaction_kthread(void *arg) @@ -2156,9 +2128,8 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) { mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); rwlock_init(&fs_info->dev_replace.lock); - atomic_set(&fs_info->dev_replace.read_locks, 0); atomic_set(&fs_info->dev_replace.blocking_readers, 0); - init_waitqueue_head(&fs_info->replace_wait); + init_waitqueue_head(&fs_info->dev_replace.replace_wait); init_waitqueue_head(&fs_info->dev_replace.read_lock_wq); } @@ -2648,7 +2619,8 @@ int open_ctree(struct super_block *sb, goto fail_dirty_metadata_bytes; } - ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL); + ret = percpu_counter_init(&fs_info->dev_replace.bio_counter, 0, + GFP_KERNEL); if (ret) { err = ret; goto fail_delalloc_bytes; @@ -3309,7 +3281,7 @@ fail_iput: iput(fs_info->btree_inode); fail_bio_counter: - percpu_counter_destroy(&fs_info->bio_counter); + percpu_counter_destroy(&fs_info->dev_replace.bio_counter); fail_delalloc_bytes: percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: @@ -3932,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info) int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); + /* + * We don't want the cleaner to start new transactions, add more delayed + * iputs, etc. while we're closing. We can't use kthread_stop() yet + * because that frees the task_struct, and the transaction kthread might + * still try to wake up the cleaner. + */ + kthread_park(fs_info->cleaner_kthread); /* wait for the qgroup rescan worker to stop */ btrfs_qgroup_wait_for_completion(fs_info, false); @@ -3959,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info) if (!sb_rdonly(fs_info->sb)) { /* - * If the cleaner thread is stopped and there are - * block groups queued for removal, the deletion will be - * skipped when we quit the cleaner thread. + * The cleaner kthread is stopped, so do one final pass over + * unused block groups. */ btrfs_delete_unused_bgs(fs_info); @@ -3977,6 +3955,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); + ASSERT(list_empty(&fs_info->delayed_iputs)); set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); btrfs_free_qgroup_config(fs_info); @@ -4018,7 +3997,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); - percpu_counter_destroy(&fs_info->bio_counter); + percpu_counter_destroy(&fs_info->dev_replace.bio_counter); cleanup_srcu_struct(&fs_info->subvol_srcu); btrfs_free_stripe_hash_table(fs_info); @@ -4204,7 +4183,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } - while ((node = rb_first(&delayed_refs->href_root)) != NULL) { + while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) { struct btrfs_delayed_ref_head *head; struct rb_node *n; bool pin_bytes = false; @@ -4222,11 +4201,11 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, continue; } spin_lock(&head->lock); - while ((n = rb_first(&head->ref_tree)) != NULL) { + while ((n = rb_first_cached(&head->ref_tree)) != NULL) { ref = rb_entry(n, struct btrfs_delayed_ref_node, ref_node); ref->in_tree = 0; - rb_erase(&ref->ref_node, &head->ref_tree); + rb_erase_cached(&ref->ref_node, &head->ref_tree); RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) list_del(&ref->add_list); @@ -4240,7 +4219,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, if (head->processing == 0) delayed_refs->num_heads_ready--; atomic_dec(&delayed_refs->num_entries); - rb_erase(&head->href_node, &delayed_refs->href_root); + rb_erase_cached(&head->href_node, &delayed_refs->href_root); RB_CLEAR_NODE(&head->href_node); spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); @@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, unpin = pinned_extents; again: while (1) { + /* + * The btrfs_finish_extent_commit() may get the same range as + * ours between find_first_extent_bit and clear_extent_dirty. + * Hence, hold the unused_bg_unpin_mutex to avoid double unpin + * the same extent range. + */ + mutex_lock(&fs_info->unused_bg_unpin_mutex); ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, NULL); - if (ret) + if (ret) { + mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; + } clear_extent_dirty(unpin, start, end); btrfs_error_unpin_extent_range(fs_info, start, end); + mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } |