From 59c3d45e487315e6e05a3f2310b61109f8e503e7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Apr 2014 09:15:35 -0600 Subject: block: remove 'q' parameter from kblockd_schedule_*_work() The queue parameter is never used, just get rid of it. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1e1fa3f93d5f..2425945d36ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1359,8 +1359,8 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); +int kblockd_schedule_work(struct work_struct *work); +int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* -- cgit v1.2.3 From 8ab14595b6dffecea264dcca2d6d9eea7c59273a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Apr 2014 09:17:40 -0600 Subject: block: add kblockd_schedule_delayed_work_on() Same function as kblockd_schedule_delayed_work(), but allow the caller to pass in a CPU that the work should be executed on. This just directly extends and maps into the workqueue API, and will be used to make the blk-mq mappings more strict. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2425945d36ab..5a31307c5ded 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1361,6 +1361,7 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); +int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* -- cgit v1.2.3 From e4043dcf30811f5db15181168e2aac172514302a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Apr 2014 10:18:23 -0600 Subject: blk-mq: ensure that hardware queues are always run on the mapped CPUs Instead of providing soft mappings with no guarantees on hardware queues always being run on the right CPU, switch to a hard mapping guarantee that ensure that we always run the hardware queue on (one of, if more) the mapped CPU. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0120451545d8..b6ee48740458 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -19,6 +19,7 @@ struct blk_mq_hw_ctx { unsigned long state; /* BLK_MQ_S_* flags */ struct delayed_work delayed_work; + cpumask_var_t cpumask; unsigned long flags; /* BLK_MQ_F_* flags */ -- cgit v1.2.3 From b4f42e2831ff9b9fa19252265d7c8985d47eefb9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Apr 2014 09:46:28 -0600 Subject: block: remove struct request buffer member This was used in the olden days, back when onions were proper yellow. Basically it mapped to the current buffer to be transferred. With highmem being added more than a decade ago, most drivers map pages out of a bio, and rq->buffer isn't pointing at anything valid. Convert old style drivers to just use bio_data(). For the discard payload use case, just reference the page in the bio. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86a8df13a5fe..eb5e94803892 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -178,7 +178,6 @@ struct request { unsigned short ioprio; void *special; /* opaque pointer available for LLD use */ - char *buffer; /* kaddr of the current segment if available */ int tag; int errors; -- cgit v1.2.3 From e9b267d91f6ddbc694cb40aa962b0b2cec03971d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Apr 2014 13:59:10 -0600 Subject: blk-mq: add ->init_request and ->exit_request methods The current blk_mq_init_commands/blk_mq_free_commands interface has a two problems: 1) Because only the constructor is passed to blk_mq_init_commands there is no easy way to clean up when a comman initialization failed. The current code simply leaks the allocations done in the constructor. 2) There is no good place to call blk_mq_free_commands: before blk_cleanup_queue there is no guarantee that all outstanding commands have completed, so we can't free them yet. After blk_cleanup_queue the queue has usually been freed. This can be worked around by grabbing an unconditional reference before calling blk_cleanup_queue and dropping it after blk_mq_free_commands is done, although that's not exatly pretty and driver writers are guaranteed to get it wrong sooner or later. Both issues are easily fixed by making the request constructor and destructor normal blk_mq_ops methods. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b6ee48740458..29c1a6e83814 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -67,6 +67,10 @@ typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef int (init_request_fn)(void *, struct blk_mq_hw_ctx *, + struct request *, unsigned int); +typedef void (exit_request_fn)(void *, struct blk_mq_hw_ctx *, + struct request *, unsigned int); struct blk_mq_ops { /* @@ -99,6 +103,14 @@ struct blk_mq_ops { */ init_hctx_fn *init_hctx; exit_hctx_fn *exit_hctx; + + /* + * Called for every command allocated by the block layer to allow + * the driver to set up driver specific data. + * Ditto for exit/teardown. + */ + init_request_fn *init_request; + exit_request_fn *exit_request; }; enum { @@ -118,8 +130,6 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); -int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); -void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -- cgit v1.2.3 From 24d2f90309b23f2cfe016b2aebc5f0d6e01c57fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Apr 2014 14:14:00 -0600 Subject: blk-mq: split out tag initialization, support shared tags Add a new blk_mq_tag_set structure that gets set up before we initialize the queue. A single blk_mq_tag_set structure can be shared by multiple queues. Signed-off-by: Christoph Hellwig Modular export of blk_mq_{alloc,free}_tagset added by me. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 29c1a6e83814..a4ea0ce83b07 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -33,8 +33,6 @@ struct blk_mq_hw_ctx { unsigned int nr_ctx_map; unsigned long *ctx_map; - struct request **rqs; - struct list_head page_list; struct blk_mq_tags *tags; unsigned long queued; @@ -42,7 +40,6 @@ struct blk_mq_hw_ctx { #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; - unsigned int queue_depth; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ @@ -50,7 +47,7 @@ struct blk_mq_hw_ctx { struct kobject kobj; }; -struct blk_mq_reg { +struct blk_mq_tag_set { struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; @@ -59,18 +56,22 @@ struct blk_mq_reg { int numa_node; unsigned int timeout; unsigned int flags; /* BLK_MQ_F_* */ + void *driver_data; + + struct blk_mq_tags **tags; }; typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); -typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); +typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *, + unsigned int); typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int); -typedef void (exit_request_fn)(void *, struct blk_mq_hw_ctx *, - struct request *, unsigned int); +typedef int (init_request_fn)(void *, struct request *, unsigned int, + unsigned int, unsigned int); +typedef void (exit_request_fn)(void *, struct request *, unsigned int, + unsigned int); struct blk_mq_ops { /* @@ -127,10 +128,13 @@ enum { BLK_MQ_MAX_DEPTH = 2048, }; -struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); +struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); +int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); +void blk_mq_free_tag_set(struct blk_mq_tag_set *set); + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_insert_request(struct request *, bool, bool, bool); @@ -139,10 +143,10 @@ void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); -struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); +struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); +struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); bool blk_mq_end_io_partial(struct request *rq, int error, @@ -173,12 +177,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) return (void *) rq + sizeof(*rq); } -static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, - unsigned int tag) -{ - return hctx->rqs[tag]; -} - #define queue_for_each_hw_ctx(q, hctx, i) \ for ((i) = 0; (i) < (q)->nr_hw_queues && \ ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) -- cgit v1.2.3 From fb3ccb5da71273e7f0d50b50bc879e50cedd60e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Apr 2014 10:30:12 +0200 Subject: block: all blk-mq requests are tagged Instead of setting the REQ_QUEUED flag on each of them just take it into account in the only macro checking it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eb5e94803892..95bb551273ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1101,7 +1101,8 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) /* * tag stuff */ -#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) +#define blk_rq_tagged(rq) \ + ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED)) extern int blk_queue_start_tag(struct request_queue *, struct request *); extern struct request *blk_queue_find_tag(struct request_queue *, int); extern void blk_queue_end_tag(struct request_queue *, struct request *); -- cgit v1.2.3 From 63151a449ebaef062ffac5b302206565ff5ef62e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:52 +0200 Subject: blk-mq: allow drivers to hook into I/O completion Split out the bottom half of blk_mq_end_io so that drivers can perform work when they know a request has been completed, but before it has been freed. This also obsoletes blk_mq_end_io_partial as drivers can now pass any value to blk_update_request directly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a4ea0ce83b07..a81b474b794f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -149,13 +149,8 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); -bool blk_mq_end_io_partial(struct request *rq, int error, - unsigned int nr_bytes); -static inline void blk_mq_end_io(struct request *rq, int error) -{ - bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); - BUG_ON(!done); -} +void blk_mq_end_io(struct request *rq, int error); +void __blk_mq_end_io(struct request *rq, int error); void blk_mq_complete_request(struct request *rq); -- cgit v1.2.3 From 1b4a325858f695a9b5041313602d34b36f463724 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:54 +0200 Subject: blk-mq: add async parameter to blk_mq_start_stopped_hw_queues Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a81b474b794f..9ecfab96d8c9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -157,7 +157,7 @@ void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -void blk_mq_start_stopped_hw_queues(struct request_queue *q); +void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); /* * Driver command data is immediately after the request. So subtract request -- cgit v1.2.3 From 70f4db639c5b2479e08657392cbf3ba3cceea11c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 10:48:08 -0600 Subject: blk-mq: add blk_mq_delay_queue Add a blk-mq equivalent to blk_delay_queue so that the scsi layer can ask to be kicked again after a delay. Signed-off-by: Christoph Hellwig Modified by me to kill the unnecessary preempt disable/enable in the delayed workqueue handler. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9ecfab96d8c9..ae868e77bc2f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -18,7 +18,8 @@ struct blk_mq_hw_ctx { } ____cacheline_aligned_in_smp; unsigned long state; /* BLK_MQ_S_* flags */ - struct delayed_work delayed_work; + struct delayed_work run_work; + struct delayed_work delay_work; cpumask_var_t cpumask; unsigned long flags; /* BLK_MQ_F_* flags */ @@ -158,6 +159,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); +void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); /* * Driver command data is immediately after the request. So subtract request -- cgit v1.2.3 From 2f268556567ebeb3538f99b9bdad177581439dcb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:56 +0200 Subject: blk-mq: add blk_mq_start_hw_queues Add a helper to unconditionally kick contexts of a queue. This will be needed by the SCSI layer to provide fair queueing between multiple devices on a single host. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ae868e77bc2f..391377e53367 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -158,6 +158,7 @@ void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); +void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -- cgit v1.2.3 From ed0791b2f83cec4e77d88c4e9baabcebf9254a78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:57 +0200 Subject: blk-mq: add blk_mq_requeue_request This allows to requeue a request that has been accepted by ->queue_rq earlier. This is needed by the SCSI layer in various error conditions. The existing internal blk_mq_requeue_request is renamed to __blk_mq_requeue_request as it is a lower level building block for this funtionality. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 391377e53367..ab469d525894 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -153,6 +153,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); +void blk_mq_requeue_request(struct request *rq); + void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From f88a164b72bd51fe4c89e06ac9939f2afe39c7ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:58 +0200 Subject: blk-mq: rename mq_flush_work struct request member We will use this work_struct to requeue scsi commands from the completion handler as well, so give it a more generic name. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95bb551273ab..71288083a46f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -98,7 +98,7 @@ struct request { struct list_head queuelist; union { struct call_single_data csd; - struct work_struct mq_flush_work; + struct work_struct requeue_work; unsigned long fifo_time; }; -- cgit v1.2.3 From 12120077b2612a243d158605640cd39266906667 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 09:44:59 +0200 Subject: block: export blk_finish_request This allows to mirror the blk-mq code flow for more a more readable I/O completion handler in SCSI. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 71288083a46f..20b26d4e53a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -936,6 +936,7 @@ extern struct request *blk_fetch_request(struct request_queue *q); */ extern bool blk_update_request(struct request *rq, int error, unsigned int nr_bytes); +extern void blk_finish_request(struct request *rq, int error); extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); -- cgit v1.2.3 From a6c39cb4f71e61aff19d07e2d0b26bb6e3548fae Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 22 Apr 2014 15:09:05 -0600 Subject: fs/bio: remove bs paramater in biovec_create_pool bs is no longer used in biovec_create_pool since 9f060e2231ca96 ("block: Convert integrity to bvec_alloc_bs()") Signed-off-by: Fabian Frederick Cc: Jens Axboe Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index bba550826921..5a645769f020 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -333,7 +333,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); -extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); +extern mempool_t *biovec_create_pool(int pool_entries); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); -- cgit v1.2.3 From 38535201633077cbaf8b32886b5e3005b36c9024 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Apr 2014 02:32:53 -0700 Subject: blk-mq: respect rq_affinity The blk-mq code is using it's own version of the I/O completion affinity tunables, which causes a few issues: - the rq_affinity sysfs file doesn't work for blk-mq devices, even if it still is present, thus breaking existing tuning setups. - the rq_affinity = 1 mode, which is the defauly for legacy request based drivers isn't implemented at all. - blk-mq drivers don't implement any completion affinity with the default flag settings. This patches removes the blk-mq ipi_redirect flag and sysfs file, as well as the internal BLK_MQ_F_SHOULD_IPI flag and replaces it with code that respects the queue-wide rq_affinity flags and also implements the rq_affinity = 1 mode. This means I/O completion affinity can now only be tuned block-queue wide instead of per context, which seems more sensible to me anyway. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ab469d525894..3b561d651a02 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -122,7 +122,6 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_SHOULD_SORT = 1 << 1, - BLK_MQ_F_SHOULD_IPI = 1 << 2, BLK_MQ_S_STOPPED = 0, -- cgit v1.2.3 From 506e931f92defdc60c1dc4aa2ff4a19a5dcd8618 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2014 10:26:44 -0600 Subject: blk-mq: add basic round-robin of what CPU to queue workqueue work on Right now we just pick the first CPU in the mask, but that can easily overload that one. Add some basic batching and round-robin all the entries in the mask instead. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 3b561d651a02..5bd677e2dcb7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -21,6 +21,8 @@ struct blk_mq_hw_ctx { struct delayed_work run_work; struct delayed_work delay_work; cpumask_var_t cpumask; + int next_cpu; + int next_cpu_batch; unsigned long flags; /* BLK_MQ_F_* flags */ @@ -126,6 +128,8 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_MAX_DEPTH = 2048, + + BLK_MQ_CPU_WORK_BATCH = 8, }; struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); -- cgit v1.2.3 From af76e555e5e29e08eb8ac1f7878e23dbf0d6741f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 May 2014 12:12:45 +0200 Subject: blk-mq: initialize struct request fields individually This allows us to avoid a non-atomic memset over ->atomic_flags as well as killing lots of duplicate initializations. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20b26d4e53a2..94b27210641b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -90,9 +90,10 @@ enum rq_cmd_type_bits { #define BLK_MAX_CDB 16 /* - * try to put the fields that are referenced together in the same cacheline. - * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init() - * as well! + * Try to put the fields that are referenced together in the same cacheline. + * + * If you modify this structure, make sure to update blk_rq_init() and + * especially blk_mq_rq_ctx_init() to take care of the added fields. */ struct request { struct list_head queuelist; -- cgit v1.2.3 From 4bb659b156996f2993dc16fad71fec9ee070153c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2014 09:36:49 -0600 Subject: blk-mq: implement new and more efficient tagging scheme blk-mq currently uses percpu_ida for tag allocation. But that only works well if the ratio between tag space and number of CPUs is sufficiently high. For most devices and systems, that is not the case. The end result if that we either only utilize the tag space partially, or we end up attempting to fully exhaust it and run into lots of lock contention with stealing between CPUs. This is not optimal. This new tagging scheme is a hybrid bitmap allocator. It uses two tricks to both be SMP friendly and allow full exhaustion of the space: 1) We cache the last allocated (or freed) tag on a per blk-mq software context basis. This allows us to limit the space we have to search. The key element here is not caching it in the shared tag structure, otherwise we end up dirtying more shared cache lines on each allocate/free operation. 2) The tag space is split into cache line sized groups, and each context will start off randomly in that space. Even up to full utilization of the space, this divides the tag users efficiently into cache line groups, avoiding dirtying the same one both between allocators and between allocator and freeer. This scheme shows drastically better behaviour, both on small tag spaces but on large ones as well. It has been tested extensively to show better performance for all the cases blk-mq cares about. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5bd677e2dcb7..f83d15f6e1c1 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -31,10 +31,12 @@ struct blk_mq_hw_ctx { void *driver_data; - unsigned int nr_ctx; - struct blk_mq_ctx **ctxs; unsigned int nr_ctx_map; unsigned long *ctx_map; + unsigned int nr_ctx; + struct blk_mq_ctx **ctxs; + + unsigned int wait_index; struct blk_mq_tags *tags; -- cgit v1.2.3 From 0d2602ca30e410e84e8bdf05c84ed5688e0a5a44 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 May 2014 15:10:52 -0600 Subject: blk-mq: improve support for shared tags maps This adds support for active queue tracking, meaning that the blk-mq tagging maintains a count of active users of a tag set. This allows us to maintain a notion of fairness between users, so that we can distribute the tag depth evenly without starving some users while allowing others to try unfair deep queues. If sharing of a tag set is detected, each hardware queue will track the depth of its own queue. And if this exceeds the total depth divided by the number of active queues, the user is actively throttled down. The active queue count is done lazily to avoid bouncing that data between submitter and completer. Each hardware queue gets marked active when it allocates its first tag, and gets marked inactive when 1) the last tag is cleared, and 2) the queue timeout grace period has passed. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 7 +++++++ include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 3 +++ 3 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f83d15f6e1c1..379f88d5c44d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -48,6 +48,8 @@ struct blk_mq_hw_ctx { unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ + atomic_t nr_active; + struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; }; @@ -64,6 +66,9 @@ struct blk_mq_tag_set { void *driver_data; struct blk_mq_tags **tags; + + struct mutex tag_list_lock; + struct list_head tag_list; }; typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); @@ -126,8 +131,10 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_SHOULD_SORT = 1 << 1, + BLK_MQ_F_TAG_SHARED = 1 << 2, BLK_MQ_S_STOPPED = 0, + BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_MAX_DEPTH = 2048, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index aa0eaa2d0bd8..d8e4cea23a25 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -190,6 +190,7 @@ enum rq_flag_bits { __REQ_PM, /* runtime pm request */ __REQ_END, /* last of chain of requests */ __REQ_HASHED, /* on IO scheduler merge hash */ + __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NR_BITS, /* stops here */ }; @@ -243,5 +244,6 @@ enum rq_flag_bits { #define REQ_PM (1ULL << __REQ_PM) #define REQ_END (1ULL << __REQ_END) #define REQ_HASHED (1ULL << __REQ_HASHED) +#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94b27210641b..6bc011a09e82 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -481,6 +481,9 @@ struct request_queue { wait_queue_head_t mq_freeze_wq; struct percpu_counter mq_usage_counter; struct list_head all_q_node; + + struct blk_mq_tag_set *tag_set; + struct list_head tag_set_list; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From 1429d7c9467e1e3de0b0ff91d7e4d67c1a92f8a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 May 2014 09:23:55 -0600 Subject: blk-mq: switch ctx pending map to the sparser blk_align_bitmap Each hardware queue has a bitmap of software queues with pending requests. When new IO is queued on a software queue, the bit is set, and when IO is pruned on a hardware queue run, the bit is cleared. This causes a lot of traffic. Switch this from the regular BITS_PER_LONG bitmap to a sparser layout, similarly to what was done for blk-mq tagging. 20% performance increase was observed for single threaded IO, and about 15% performanc increase on multiple threads driving the same device. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f83d15f6e1c1..952e558ee598 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -11,6 +11,12 @@ struct blk_mq_cpu_notifier { void (*notify)(void *data, unsigned long action, unsigned int cpu); }; +struct blk_mq_ctxmap { + unsigned int map_size; + unsigned int bits_per_word; + struct blk_align_bitmap *map; +}; + struct blk_mq_hw_ctx { struct { spinlock_t lock; @@ -31,8 +37,8 @@ struct blk_mq_hw_ctx { void *driver_data; - unsigned int nr_ctx_map; - unsigned long *ctx_map; + struct blk_mq_ctxmap ctx_map; + unsigned int nr_ctx; struct blk_mq_ctx **ctxs; -- cgit v1.2.3 From e3a2b3f931f59d5284abd13faf8bded726884ffd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 May 2014 11:49:02 -0600 Subject: blk-mq: allow changing of queue depth through sysfs For request_fn based devices, the block layer exports a 'nr_requests' file through sysfs to allow adjusting of queue depth on the fly. Currently this returns -EINVAL for blk-mq, since it's not wired up. Wire this up for blk-mq, so that it now also always dynamic adjustments of the allowed queue depth for any given block device managed by blk-mq. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a06ca7b5ea05..f45424453338 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -63,7 +63,7 @@ struct blk_mq_hw_ctx { struct blk_mq_tag_set { struct blk_mq_ops *ops; unsigned int nr_hw_queues; - unsigned int queue_depth; + unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; unsigned int cmd_size; /* per-request extra data */ int numa_node; -- cgit v1.2.3 From e814e71ba4a6e1d7509b0f4b1928365ea650cace Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 May 2014 13:59:08 -0600 Subject: blk-mq: allow the hctx cpu hotplug notifier to return errors Prepare this for the next patch which adds more smarts in the plugging logic, so that we can save some memory. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f45424453338..4d2800567aad 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,7 +8,7 @@ struct blk_mq_tags; struct blk_mq_cpu_notifier { struct list_head list; void *data; - void (*notify)(void *data, unsigned long action, unsigned int cpu); + int (*notify)(void *data, unsigned long action, unsigned int cpu); }; struct blk_mq_ctxmap { -- cgit v1.2.3 From edf866b3805c5651bf7d035b72dc0190cb6ff4a7 Mon Sep 17 00:00:00 2001 From: Sam Bradshaw Date: Fri, 23 May 2014 13:30:16 -0600 Subject: blk-mq: export blk_mq_tag_busy_iter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export the blk-mq in-flight tag iterator for driver consumption. This is particularly useful in exception paths or SRSI where in-flight IOs need to be cancelled and/or reissued. The NVMe driver conversion will use this. Signed-off-by: Sam Bradshaw Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4d2800567aad..f76bb18350af 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -181,6 +181,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); +void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); /* * Driver command data is immediately after the request. So subtract request -- cgit v1.2.3 From f14bbe77a96bb979dc539d8308ee18a9363a544f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2014 12:06:53 -0600 Subject: blk-mq: pass in suggested NUMA node to ->alloc_hctx() Drivers currently have to figure this out on their own, and they are missing information to do it properly. The ones that did attempt to do it, do it wrong. So just pass in the suggested node directly to the alloc function. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f76bb18350af..afeb93496907 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -80,7 +80,7 @@ struct blk_mq_tag_set { typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *, - unsigned int); + unsigned int, int); typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); @@ -165,7 +165,7 @@ struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, g struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int); +struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); -- cgit v1.2.3 From 95f096849932fe5eaa7bfec887530cf556744a76 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2014 17:46:48 -0600 Subject: blk-mq: allow non-softirq completions Right now we export two ways of completing a request: 1) blk_mq_complete_request(). This uses an IPI (if needed) and completes through q->softirq_done_fn(). It also works with timeouts. 2) blk_mq_end_io(). This completes inline, and ignores any timeout state of the request. Let blk_mq_complete_request() handle non-softirq_done_fn completions as well, by just completing inline. If a driver has enough completion ports to place completions correctly, it need not define a mq_ops->complete() and we can avoid an indirect function call by doing the completion inline. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index afeb93496907..1dfeb1529a61 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -173,6 +173,10 @@ void __blk_mq_end_io(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); +/* + * Complete request through potential IPI for right placement. Driver must + * have defined a mq_ops->complete() hook for this. + */ void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 7738dac4f697ffbd0ed4c4aeb69a714ef9d876da Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 28 May 2014 08:06:34 -0600 Subject: blk-mq: remove stale comment for blk_mq_complete_request() It works for both IPI and local completions as of commit 95f096849932. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1dfeb1529a61..5b171fbe95c5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -172,11 +172,6 @@ void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); - -/* - * Complete request through potential IPI for right placement. Driver must - * have defined a mq_ops->complete() hook for this. - */ void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 6fca6a611c27f1f0d90fbe1cc3c229dbf8c09e48 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 08:08:02 -0600 Subject: blk-mq: add helper to insert requests from irq context Both the cache flush state machine and the SCSI midlayer want to submit requests from irq context, and the current per-request requeue_work unfortunately causes corruption due to sharing with the csd field for flushes. Replace them with a per-request_queue list of requests to be requeued. Based on an earlier test by Ming Lei. Signed-off-by: Christoph Hellwig Reported-by: Ming Lei Tested-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5b171fbe95c5..b9a74a386dbc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -172,6 +172,8 @@ void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); +void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6bc011a09e82..913f1c2d3be0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,7 +99,6 @@ struct request { struct list_head queuelist; union { struct call_single_data csd; - struct work_struct requeue_work; unsigned long fifo_time; }; @@ -463,6 +462,10 @@ struct request_queue { struct request *flush_rq; spinlock_t mq_flush_lock; + struct list_head requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; + struct mutex sysfs_lock; int bypass_depth; -- cgit v1.2.3 From 4ce01dd1a07d9cf3eaf44fbf4ea9a61b11badccc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 May 2014 20:59:46 +0200 Subject: blk-mq: merge blk_mq_alloc_reserved_request into blk_mq_alloc_request Instead of having two almost identical copies of the same code just let the callers pass in the reserved flag directly. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b9a74a386dbc..2bd82f399128 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -160,8 +160,8 @@ void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); -struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, + gfp_t gfp, bool reserved); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -- cgit v1.2.3 From cdef54dd85ad66e77262ea57796a3e81683dd5d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 18:11:06 +0200 Subject: blk-mq: remove alloc_hctx and free_hctx methods There is no need for drivers to control hardware context allocation now that we do the context to node mapping in common code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2bd82f399128..91dfb75ce39f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -79,9 +79,6 @@ struct blk_mq_tag_set { typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); -typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *, - unsigned int, int); -typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_request_fn)(void *, struct request *, unsigned int, @@ -107,12 +104,6 @@ struct blk_mq_ops { softirq_done_fn *complete; - /* - * Override for hctx allocations (should probably go) - */ - alloc_hctx_fn *alloc_hctx; - free_hctx_fn *free_hctx; - /* * Called when the block layer side of a hardware queue has been * set up, allowing the driver to allocate/init matching structures. @@ -166,7 +157,6 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); -void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); -- cgit v1.2.3 From 4d92a9beb39d80a7d8ff7c04ae12a10290105ae5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 May 2014 08:09:00 -0600 Subject: block: remove 'magic' from struct blk_plug I don't think we've ever caught any bugs with this, and there's the list poisoning for the plug lists to catch uninitialized cases. So remove the magic member and save 8 bytes in the struct. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 913f1c2d3be0..098304576d51 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1060,7 +1060,6 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ -- cgit v1.2.3 From 05f1dd5315217398fc8d122bdee80f96a9f21274 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 May 2014 09:53:32 -0600 Subject: block: add queue flag for disabling SG merging If devices are not SG starved, we waste a lot of time potentially collapsing SG segments. Enough that 1.5% of the CPU time goes to this, at only 400K IOPS. Add a queue flag, QUEUE_FLAG_NO_SG_MERGE, which just returns the number of vectors in a bio instead of looping over all segments and checking for collapsible ones. Add a BLK_MQ_F_SG_MERGE flag so that drivers can opt-in on the sg merging, if they so desire. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 91dfb75ce39f..95de239444d2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -129,6 +129,7 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_SHOULD_SORT = 1 << 1, BLK_MQ_F_TAG_SHARED = 1 << 2, + BLK_MQ_F_SG_MERGE = 1 << 3, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 098304576d51..695b9fd41efe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -510,6 +510,7 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ +#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From 2230237500821aedfcf2bba2a79d9cbca389233c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 30 May 2014 08:06:42 -0600 Subject: blk-mq: blk_mq_tag_to_rq should handle flush request flush request is special, which borrows the tag from the parent request. Hence blk_mq_tag_to_rq needs special handling to return the flush request from the tag. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 95de239444d2..ad3adb73cc70 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -154,7 +154,7 @@ void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); -struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); +struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); -- cgit v1.2.3 From 67aec14ce87fe25bdfff7dbf468556333df11c4e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 May 2014 08:25:36 -0600 Subject: blk-mq: make the sysfs mq/ layout reflect current mappings Currently blk-mq registers all the hardware queues in sysfs, regardless of whether it uses them (e.g. they have CPU mappings) or not. The unused hardware queues lack the cpux/ directories, and the other sysfs entries (like active, pending, etc) are all zeroes. Change this so that sysfs correctly reflects the current mappings of the hardware queues. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ad3adb73cc70..c15128833100 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -130,6 +130,7 @@ enum { BLK_MQ_F_SHOULD_SORT = 1 << 1, BLK_MQ_F_TAG_SHARED = 1 << 2, BLK_MQ_F_SG_MERGE = 1 << 3, + BLK_MQ_F_SYSFS_UP = 1 << 4, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, -- cgit v1.2.3