From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/device-mapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/device-mapper.h') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ded2d7c42668..49c2362977fd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -149,7 +149,7 @@ struct io_restrictions { unsigned max_hw_sectors; unsigned max_sectors; unsigned max_segment_size; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; unsigned char no_cluster; /* inverted so that 0 is default */ -- cgit v1.2.3 From f9ab94cee313746573b2d693bc2afb807ebb0998 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 22 Jun 2009 10:12:20 +0100 Subject: dm: introduce num_flush_requests Introduce num_flush_requests for a target to set to say how many flush instructions (empty barriers) it wants to receive. These are sent by __clone_and_map_empty_barrier with map_info->flush_request going from 0 to (num_flush_requests - 1). Old targets without flush support won't receive any flush requests. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/device-mapper.h') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 49c2362977fd..fc36a4d07723 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -21,6 +21,7 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; union map_info { void *ptr; unsigned long long ll; + unsigned flush_request; }; /* @@ -167,6 +168,16 @@ struct dm_target { /* Always a power of 2 */ sector_t split_io; + /* + * A number of zero-length barrier requests that will be submitted + * to the target for the purpose of flushing cache. + * + * The request number will be placed in union map_info->flush_request. + * It is a responsibility of the target driver to remap these requests + * to the real underlying devices. + */ + unsigned num_flush_requests; + /* * These are automatically filled in by * dm_table_get_device. -- cgit v1.2.3 From 5ab97588fb266187b88d1ad893251c94388f18ba Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 22 Jun 2009 10:12:32 +0100 Subject: dm table: replace struct io_restrictions with struct queue_limits Use blk_stack_limits() to stack block limits (including topology) rather than duplicate the equivalent within Device Mapper. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux/device-mapper.h') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index fc36a4d07723..236880c1dc3f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -144,18 +144,6 @@ struct target_type { struct list_head list; }; -struct io_restrictions { - unsigned long bounce_pfn; - unsigned long seg_boundary_mask; - unsigned max_hw_sectors; - unsigned max_sectors; - unsigned max_segment_size; - unsigned short logical_block_size; - unsigned short max_hw_segments; - unsigned short max_phys_segments; - unsigned char no_cluster; /* inverted so that 0 is default */ -}; - struct dm_target { struct dm_table *table; struct target_type *type; @@ -164,7 +152,7 @@ struct dm_target { sector_t begin; sector_t len; - /* FIXME: turn this into a mask, and merge with io_restrictions */ + /* FIXME: turn this into a mask, and merge with queue_limits */ /* Always a power of 2 */ sector_t split_io; @@ -182,7 +170,7 @@ struct dm_target { * These are automatically filled in by * dm_table_get_device. */ - struct io_restrictions limits; + struct queue_limits limits; /* target specific data */ void *private; -- cgit v1.2.3 From af4874e03ed82f050d5872d8c39ce64bf16b5c38 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 22 Jun 2009 10:12:33 +0100 Subject: dm target:s introduce iterate devices fn Add .iterate_devices to 'struct target_type' to allow a function to be called for all devices in a DM target. Implemented it for all targets except those in dm-snap.c (origin and snapshot). (The raid1 version number jumps to 1.12 because we originally reserved 1.1 to 1.11 for 'block_on_error' but ended up using 'handle_errors' instead.) Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon Cc: martin.petersen@oracle.com --- include/linux/device-mapper.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/device-mapper.h') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 236880c1dc3f..deac3b4e5e18 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -11,6 +11,7 @@ #include #include +struct dm_dev; struct dm_target; struct dm_table; struct mapped_device; @@ -81,6 +82,15 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, struct bio_vec *biovec, int max_size); +typedef int (*iterate_devices_callout_fn) (struct dm_target *ti, + struct dm_dev *dev, + sector_t physical_start, + void *data); + +typedef int (*dm_iterate_devices_fn) (struct dm_target *ti, + iterate_devices_callout_fn fn, + void *data); + /* * Returns: * 0: The target can handle the next I/O immediately. @@ -139,6 +149,7 @@ struct target_type { dm_ioctl_fn ioctl; dm_merge_fn merge; dm_busy_fn busy; + dm_iterate_devices_fn iterate_devices; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 754c5fc7ebb417b23601a6222a6005cc2e7f2913 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 22 Jun 2009 10:12:34 +0100 Subject: dm: calculate queue limits during resume not load Currently, device-mapper maintains a separate instance of 'struct queue_limits' for each table of each device. When the configuration of a device is to be changed, first its table is loaded and this structure is populated, then the device is 'resumed' and the calculated queue_limits are applied. This places restrictions on how userspace may process related devices, where it is often advantageous to 'load' tables for several devices at once before 'resuming' them together. As the new queue_limits only take effect after the 'resume', if they are changing and one device uses another, the latter must be 'resumed' before the former may be 'loaded'. This patch moves the calculation of these queue_limits out of the 'load' operation into 'resume'. Since we are no longer pre-calculating this struct, we no longer need to maintain copies within our dm structs. dm_set_device_limits() now passes the 'start' of the device's data area (aka pe_start) as the 'offset' to blk_stack_limits(). init_valid_queue_limits() is replaced by blk_set_default_limits(). Signed-off-by: Mike Snitzer Cc: martin.petersen@oracle.com Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux/device-mapper.h') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index deac3b4e5e18..e6bf3b8c7bf2 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -103,7 +103,8 @@ void dm_error(const char *message); /* * Combine device limits. */ -void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev); +int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, + sector_t start, void *data); struct dm_dev { struct block_device *bdev; @@ -163,7 +164,6 @@ struct dm_target { sector_t begin; sector_t len; - /* FIXME: turn this into a mask, and merge with queue_limits */ /* Always a power of 2 */ sector_t split_io; @@ -177,12 +177,6 @@ struct dm_target { */ unsigned num_flush_requests; - /* - * These are automatically filled in by - * dm_table_get_device. - */ - struct queue_limits limits; - /* target specific data */ void *private; -- cgit v1.2.3 From cec47e3d4a861e1d942b3a580d0bbef2700d2bb2 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Mon, 22 Jun 2009 10:12:35 +0100 Subject: dm: prepare for request based option This patch adds core functions for request-based dm. When struct mapped device (md) is initialized, md->queue has an I/O scheduler and the following functions are used for request-based dm as the queue functions: make_request_fn: dm_make_request() pref_fn: dm_prep_fn() request_fn: dm_request_fn() softirq_done_fn: dm_softirq_done() lld_busy_fn: dm_lld_busy() Actual initializations are done in another patch (PATCH 2). Below is a brief summary of how request-based dm behaves, including: - making request from bio - cloning, mapping and dispatching request - completing request and bio - suspending md - resuming md bio to request ============== md->queue->make_request_fn() (dm_make_request()) calls __make_request() for a bio submitted to the md. Then, the bio is kept in the queue as a new request or merged into another request in the queue if possible. Cloning and Mapping =================== Cloning and mapping are done in md->queue->request_fn() (dm_request_fn()), when requests are dispatched after they are sorted by the I/O scheduler. dm_request_fn() checks busy state of underlying devices using target's busy() function and stops dispatching requests to keep them on the dm device's queue if busy. It helps better I/O merging, since no merge is done for a request once it is dispatched to underlying devices. Actual cloning and mapping are done in dm_prep_fn() and map_request() called from dm_request_fn(). dm_prep_fn() clones not only request but also bios of the request so that dm can hold bio completion in error cases and prevent the bio submitter from noticing the error. (See the "Completion" section below for details.) After the cloning, the clone is mapped by target's map_rq() function and inserted to underlying device's queue using blk_insert_cloned_request(). Completion ========== Request completion can be hooked by rq->end_io(), but then, all bios in the request will have been completed even error cases, and the bio submitter will have noticed the error. To prevent the bio completion in error cases, request-based dm clones both bio and request and hooks both bio->bi_end_io() and rq->end_io(): bio->bi_end_io(): end_clone_bio() rq->end_io(): end_clone_request() Summary of the request completion flow is below: blk_end_request() for a clone request => blk_update_request() => bio->bi_end_io() == end_clone_bio() for each clone bio => Free the clone bio => Success: Complete the original bio (blk_update_request()) Error: Don't complete the original bio => blk_finish_request() => rq->end_io() == end_clone_request() => blk_complete_request() => dm_softirq_done() => Free the clone request => Success: Complete the original request (blk_end_request()) Error: Requeue the original request end_clone_bio() completes the original request on the size of the original bio in successful cases. Even if all bios in the original request are completed by that completion, the original request must not be completed yet to keep the ordering of request completion for the stacking. So end_clone_bio() uses blk_update_request() instead of blk_end_request(). In error cases, end_clone_bio() doesn't complete the original bio. It just frees the cloned bio and gives over the error handling to end_clone_request(). end_clone_request(), which is called with queue lock held, completes the clone request and the original request in a softirq context (dm_softirq_done()), which has no queue lock, to avoid a deadlock issue on submission of another request during the completion: - The submitted request may be mapped to the same device - Request submission requires queue lock, but the queue lock has been held by itself and it doesn't know that The clone request has no clone bio when dm_softirq_done() is called. So target drivers can't resubmit it again even error cases. Instead, they can ask dm core for requeueing and remapping the original request in that cases. suspend ======= Request-based dm uses stopping md->queue as suspend of the md. For noflush suspend, just stops md->queue. For flush suspend, inserts a marker request to the tail of md->queue. And dispatches all requests in md->queue until the marker comes to the front of md->queue. Then, stops dispatching request and waits for the all dispatched requests to complete. After that, completes the marker request, stops md->queue and wake up the waiter on the suspend queue, md->wait. resume ====== Starts md->queue. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/device-mapper.h') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e6bf3b8c7bf2..0d6310657f32 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -234,6 +234,7 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); int dm_noflush_suspending(struct dm_target *ti); union map_info *dm_get_mapinfo(struct bio *bio); +union map_info *dm_get_rq_mapinfo(struct request *rq); /* * Geometry functions. @@ -396,4 +397,12 @@ static inline unsigned long to_bytes(sector_t n) return (n << SECTOR_SHIFT); } +/*----------------------------------------------------------------- + * Helper for block layer and dm core operations + *---------------------------------------------------------------*/ +void dm_dispatch_request(struct request *rq); +void dm_requeue_unmapped_request(struct request *rq); +void dm_kill_unmapped_request(struct request *rq, int error); +int dm_underlying_device_busy(struct request_queue *q); + #endif /* _LINUX_DEVICE_MAPPER_H */ -- cgit v1.2.3