From 8b7128429235d9bd72cfd5ed20c77c4f3118f744 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 16:50:36 -0400 Subject: Btrfs: Add async worker threads for pre and post IO checksumming Btrfs has been using workqueues to spread the checksumming load across other CPUs in the system. But, workqueues only schedule work on the same CPU that queued the work, giving them a limited benefit for systems with higher CPU counts. This code adds a generic facility to schedule work with pools of kthreads, and changes the bio submission code to queue bios up. The queueing is important to make sure large numbers of procs on the system don't turn streaming workloads into random workloads by sending IO down concurrently. The end result of all of this is much higher performance (and CPU usage) when doing checksumming on large machines. Two worker pools are created, one for writes and one for endio processing. The two could deadlock if we tried to service both from a single pool. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.h | 78 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 fs/btrfs/async-thread.h (limited to 'fs/btrfs/async-thread.h') diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h new file mode 100644 index 000000000000..52fc9da0f9e7 --- /dev/null +++ b/fs/btrfs/async-thread.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_ASYNC_THREAD_ +#define __BTRFS_ASYNC_THREAD_ + +struct btrfs_worker_thread; + +/* + * This is similar to a workqueue, but it is meant to spread the operations + * across all available cpus instead of just the CPU that was used to + * queue the work. There is also some batching introduced to try and + * cut down on context switches. + * + * By default threads are added on demand up to 2 * the number of cpus. + * Changing struct btrfs_workers->max_workers is one way to prevent + * demand creation of kthreads. + * + * the basic model of these worker threads is to embed a btrfs_work + * structure in your own data struct, and use container_of in a + * work function to get back to your data struct. + */ +struct btrfs_work { + /* + * only func should be set to the function you want called + * your work struct is passed as the only arg + */ + void (*func)(struct btrfs_work *work); + + /* + * flags should be set to zero. It is used to make sure the + * struct is only inserted once into the list. + */ + unsigned long flags; + + /* don't touch these */ + struct btrfs_worker_thread *worker; + struct list_head list; +}; + +struct btrfs_workers { + /* current number of running workers */ + int num_workers; + + /* max number of workers allowed. changed by btrfs_start_workers */ + int max_workers; + + /* list with all the work threads */ + struct list_head worker_list; + + /* the last worker thread to have something queued */ + struct btrfs_worker_thread *last; + + /* lock for finding the next worker thread to queue on */ + spinlock_t lock; +}; + +int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); +int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); +int btrfs_stop_workers(struct btrfs_workers *workers); +void btrfs_init_workers(struct btrfs_workers *workers, int max); +int btrfs_requeue_work(struct btrfs_work *work); +#endif -- cgit v1.2.3 From 35d8ba66294ff2a53c17337a1aa1ff6739492f41 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 20:21:24 -0400 Subject: Btrfs: Worker thread optimizations This changes the worker thread pool to maintain a list of idle threads, avoiding a complex search for a good thread to wake up. Threads have two states: idle - we try to reuse the last thread used in hopes of improving the batching ratios busy - each time a new work item is added to a busy task, the task is rotated to the end of the line. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/async-thread.h') diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 52fc9da0f9e7..3436ff897597 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -60,11 +60,12 @@ struct btrfs_workers { /* max number of workers allowed. changed by btrfs_start_workers */ int max_workers; + /* once a worker has this many requests or fewer, it is idle */ + int idle_thresh; + /* list with all the work threads */ struct list_head worker_list; - - /* the last worker thread to have something queued */ - struct btrfs_worker_thread *last; + struct list_head idle_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; -- cgit v1.2.3 From 5443be45f5cb57d02fd895a0bcaf7e7d9890b1df Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:16 -0400 Subject: Btrfs: Give all the worker threads descriptive names Signed-off-by: Chris Mason --- fs/btrfs/async-thread.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/async-thread.h') diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 3436ff897597..43e44d115dd1 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -69,11 +69,14 @@ struct btrfs_workers { /* lock for finding the next worker thread to queue on */ spinlock_t lock; + + /* extra name for this worker */ + char *name; }; int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); int btrfs_stop_workers(struct btrfs_workers *workers); -void btrfs_init_workers(struct btrfs_workers *workers, int max); +void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); int btrfs_requeue_work(struct btrfs_work *work); #endif -- cgit v1.2.3 From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 15:18:18 -0400 Subject: Btrfs: add and improve comments This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/async-thread.h') diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 43e44d115dd1..4ec9a2ee0f9d 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -63,14 +63,17 @@ struct btrfs_workers { /* once a worker has this many requests or fewer, it is idle */ int idle_thresh; - /* list with all the work threads */ + /* list with all the work threads. The workers on the idle thread + * may be actively servicing jobs, but they haven't yet hit the + * idle thresh limit above. + */ struct list_head worker_list; struct list_head idle_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; - /* extra name for this worker */ + /* extra name for this worker, used for current->name */ char *name; }; -- cgit v1.2.3 From 4a69a41009c4ac691f7d9c289f5f37fabeddce46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 6 Nov 2008 22:03:00 -0500 Subject: Btrfs: Add ordered async work queues Btrfs uses kernel threads to create async work queues for cpu intensive operations such as checksumming and decompression. These work well, but they make it difficult to keep IO order intact. A single writepages call from pdflush or fsync will turn into a number of bios, and each bio is checksummed in parallel. Once the checksum is computed, the bio is sent down to the disk, and since we don't control the order in which the parallel operations happen, they might go down to the disk in almost any order. The code deals with this somewhat by having deep work queues for a single kernel thread, making it very likely that a single thread will process all the bios for a single inode. This patch introduces an explicitly ordered work queue. As work structs are placed into the queue they are put onto the tail of a list. They have three callbacks: ->func (cpu intensive processing here) ->ordered_func (order sensitive processing here) ->ordered_free (free the work struct, all processing is done) The work struct has three callbacks. The func callback does the cpu intensive work, and when it completes the work struct is marked as done. Every time a work struct completes, the list is checked to see if the head is marked as done. If so the ordered_func callback is used to do the order sensitive processing and the ordered_free callback is used to do any cleanup. Then we loop back and check the head of the list again. This patch also changes the checksumming code to use the ordered workqueues. One a 4 drive array, it increases streaming writes from 280MB/s to 350MB/s. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/async-thread.h') diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 4ec9a2ee0f9d..31be4ed8b63e 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -37,10 +37,16 @@ struct btrfs_worker_thread; */ struct btrfs_work { /* - * only func should be set to the function you want called + * func should be set to the function you want called * your work struct is passed as the only arg + * + * ordered_func must be set for work sent to an ordered work queue, + * and it is called to complete a given work item in the same + * order they were sent to the queue. */ void (*func)(struct btrfs_work *work); + void (*ordered_func)(struct btrfs_work *work); + void (*ordered_free)(struct btrfs_work *work); /* * flags should be set to zero. It is used to make sure the @@ -51,6 +57,7 @@ struct btrfs_work { /* don't touch these */ struct btrfs_worker_thread *worker; struct list_head list; + struct list_head order_list; }; struct btrfs_workers { @@ -63,6 +70,9 @@ struct btrfs_workers { /* once a worker has this many requests or fewer, it is idle */ int idle_thresh; + /* force completions in the order they were queued */ + int ordered; + /* list with all the work threads. The workers on the idle thread * may be actively servicing jobs, but they haven't yet hit the * idle thresh limit above. @@ -70,6 +80,12 @@ struct btrfs_workers { struct list_head worker_list; struct list_head idle_list; + /* + * when operating in ordered mode, this maintains the list + * of work items waiting for completion + */ + struct list_head order_list; + /* lock for finding the next worker thread to queue on */ spinlock_t lock; -- cgit v1.2.3