summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/io.c2
-rw-r--r--drivers/md/bcache/request.c2
-rw-r--r--drivers/md/bitmap.c7
-rw-r--r--drivers/md/dm-cache-target.c6
-rw-r--r--drivers/md/dm-crypt.c12
-rw-r--r--drivers/md/dm-mpath.c4
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-snap.c1
-rw-r--r--drivers/md/dm-table.c41
-rw-r--r--drivers/md/dm-thin.c9
-rw-r--r--drivers/md/dm-verity.c2
-rw-r--r--drivers/md/dm.c211
-rw-r--r--drivers/md/dm.h5
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/raid0.c9
-rw-r--r--drivers/md/raid10.c1
-rw-r--r--drivers/md/raid5.c270
-rw-r--r--drivers/md/raid5.h5
18 files changed, 299 insertions, 322 deletions
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index fa028fa82df4..cb64e64a4789 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -55,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl)
s->bio->bi_end_io = s->bi_end_io;
s->bio->bi_private = s->bi_private;
- bio_endio_nodec(s->bio, 0);
+ bio_endio(s->bio, 0);
closure_debug_destroy(&s->cl);
mempool_free(s, s->p->bio_split_hook);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ab43faddb447..1616f668a4cb 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -619,7 +619,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio)
bio->bi_end_io = request_endio;
bio->bi_private = &s->cl;
- atomic_set(&bio->bi_cnt, 3);
+ bio_cnt_set(bio, 3);
}
static void search_free(struct closure *cl)
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 2bc56e2a3526..135a0907e9de 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -177,11 +177,16 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde
* nr_pending is 0 and In_sync is clear, the entries we return will
* still be in the same position on the list when we re-enter
* list_for_each_entry_continue_rcu.
+ *
+ * Note that if entered with 'rdev == NULL' to start at the
+ * beginning, we temporarily assign 'rdev' to an address which
+ * isn't really an rdev, but which can be used by
+ * list_for_each_entry_continue_rcu() to find the first entry.
*/
rcu_read_lock();
if (rdev == NULL)
/* start at the beginning */
- rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set);
+ rdev = list_entry(&mddev->disks, struct md_rdev, same_set);
else {
/* release the previous rdev and start from there. */
rdev_dec_pending(rdev, mddev);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 7755af351867..41b2594a80c6 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -86,12 +86,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
{
bio->bi_end_io = h->bi_end_io;
bio->bi_private = h->bi_private;
-
- /*
- * Must bump bi_remaining to allow bio to complete with
- * restored bi_end_io.
- */
- atomic_inc(&bio->bi_remaining);
}
/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9eeea196328a..5503e43e5f28 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -925,10 +925,11 @@ static int crypt_convert(struct crypt_config *cc,
switch (r) {
/* async */
- case -EINPROGRESS:
case -EBUSY:
wait_for_completion(&ctx->restart);
reinit_completion(&ctx->restart);
+ /* fall through*/
+ case -EINPROGRESS:
ctx->req = NULL;
ctx->cc_sector++;
continue;
@@ -1345,8 +1346,10 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
struct crypt_config *cc = io->cc;
- if (error == -EINPROGRESS)
+ if (error == -EINPROGRESS) {
+ complete(&ctx->restart);
return;
+ }
if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
@@ -1357,15 +1360,12 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
if (!atomic_dec_and_test(&ctx->cc_pending))
- goto done;
+ return;
if (bio_data_dir(io->base_bio) == READ)
kcryptd_crypt_read_done(io);
else
kcryptd_crypt_write_io_submit(io, 1);
-done:
- if (!completion_done(&ctx->restart))
- complete(&ctx->restart);
}
static void kcryptd_crypt(struct work_struct *work)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 63953477a07c..eff7bdd7731d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -429,9 +429,11 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
/* blk-mq request-based interface */
*__clone = blk_get_request(bdev_get_queue(bdev),
rq_data_dir(rq), GFP_ATOMIC);
- if (IS_ERR(*__clone))
+ if (IS_ERR(*__clone)) {
/* ENOMEM, requeue */
+ clear_mapinfo(m, map_context);
return r;
+ }
(*__clone)->bio = (*__clone)->biotail = NULL;
(*__clone)->rq_disk = bdev->bd_disk;
(*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 089d62751f7f..743fa9bbae9e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1254,8 +1254,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
dm_bio_restore(bd, bio);
bio_record->details.bi_bdev = NULL;
- atomic_inc(&bio->bi_remaining);
-
queue_bio(ms, bio, rw);
return DM_ENDIO_INCOMPLETE;
}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f83a0f3fc365..7c82d3ccce87 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1478,7 +1478,6 @@ out:
if (full_bio) {
full_bio->bi_end_io = pe->full_bio_end_io;
full_bio->bi_private = pe->full_bio_private;
- atomic_inc(&full_bio->bi_remaining);
}
increment_pending_exceptions_done_count();
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index d9b00b8565c6..a5f94125ad01 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -820,6 +820,12 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
}
EXPORT_SYMBOL(dm_consume_args);
+static bool __table_type_request_based(unsigned table_type)
+{
+ return (table_type == DM_TYPE_REQUEST_BASED ||
+ table_type == DM_TYPE_MQ_REQUEST_BASED);
+}
+
static int dm_table_set_type(struct dm_table *t)
{
unsigned i;
@@ -852,8 +858,7 @@ static int dm_table_set_type(struct dm_table *t)
* Determine the type from the live device.
* Default to bio-based if device is new.
*/
- if (live_md_type == DM_TYPE_REQUEST_BASED ||
- live_md_type == DM_TYPE_MQ_REQUEST_BASED)
+ if (__table_type_request_based(live_md_type))
request_based = 1;
else
bio_based = 1;
@@ -903,7 +908,7 @@ static int dm_table_set_type(struct dm_table *t)
}
t->type = DM_TYPE_MQ_REQUEST_BASED;
- } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) {
+ } else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
/* inherit live MD type */
t->type = live_md_type;
@@ -925,10 +930,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
bool dm_table_request_based(struct dm_table *t)
{
- unsigned table_type = dm_table_get_type(t);
-
- return (table_type == DM_TYPE_REQUEST_BASED ||
- table_type == DM_TYPE_MQ_REQUEST_BASED);
+ return __table_type_request_based(dm_table_get_type(t));
}
bool dm_table_mq_request_based(struct dm_table *t)
@@ -940,21 +942,28 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
{
unsigned type = dm_table_get_type(t);
unsigned per_bio_data_size = 0;
- struct dm_target *tgt;
unsigned i;
- if (unlikely(type == DM_TYPE_NONE)) {
+ switch (type) {
+ case DM_TYPE_BIO_BASED:
+ for (i = 0; i < t->num_targets; i++) {
+ struct dm_target *tgt = t->targets + i;
+
+ per_bio_data_size = max(per_bio_data_size,
+ tgt->per_bio_data_size);
+ }
+ t->mempools = dm_alloc_bio_mempools(t->integrity_supported,
+ per_bio_data_size);
+ break;
+ case DM_TYPE_REQUEST_BASED:
+ case DM_TYPE_MQ_REQUEST_BASED:
+ t->mempools = dm_alloc_rq_mempools(md, type);
+ break;
+ default:
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}
- if (type == DM_TYPE_BIO_BASED)
- for (i = 0; i < t->num_targets; i++) {
- tgt = t->targets + i;
- per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
- }
-
- t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
if (!t->mempools)
return -ENOMEM;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 921aafd12aee..e852602c0091 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -793,10 +793,9 @@ static void inc_remap_and_issue_cell(struct thin_c *tc,
static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
{
- if (m->bio) {
+ if (m->bio)
m->bio->bi_end_io = m->saved_bi_end_io;
- atomic_inc(&m->bio->bi_remaining);
- }
+
cell_error(m->tc->pool, m->cell);
list_del(&m->list);
mempool_free(m, m->tc->pool->mapping_pool);
@@ -810,10 +809,8 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
int r;
bio = m->bio;
- if (bio) {
+ if (bio)
bio->bi_end_io = m->saved_bi_end_io;
- atomic_inc(&bio->bi_remaining);
- }
if (m->err) {
cell_error(pool, m->cell);
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 66616db33e6f..bb9c6a00e4b0 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -459,7 +459,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
bio->bi_end_io = io->orig_bi_end_io;
bio->bi_private = io->orig_bi_private;
- bio_endio_nodec(bio, error);
+ bio_endio(bio, error);
}
static void verity_work(struct work_struct *w)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a930b72314ac..4d6f089a0e9e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -990,57 +990,6 @@ static void clone_endio(struct bio *bio, int error)
dec_pending(io, error);
}
-/*
- * Partial completion handling for request-based dm
- */
-static void end_clone_bio(struct bio *clone, int error)
-{
- struct dm_rq_clone_bio_info *info =
- container_of(clone, struct dm_rq_clone_bio_info, clone);
- struct dm_rq_target_io *tio = info->tio;
- struct bio *bio = info->orig;
- unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-
- bio_put(clone);
-
- if (tio->error)
- /*
- * An error has already been detected on the request.
- * Once error occurred, just let clone->end_io() handle
- * the remainder.
- */
- return;
- else if (error) {
- /*
- * Don't notice the error to the upper layer yet.
- * The error handling decision is made by the target driver,
- * when the request is completed.
- */
- tio->error = error;
- return;
- }
-
- /*
- * I/O for the bio successfully completed.
- * Notice the data completion to the upper layer.
- */
-
- /*
- * bios are processed from the head of the list.
- * So the completing bio should always be rq->bio.
- * If it's not, something wrong is happening.
- */
- if (tio->orig->bio != bio)
- DMERR("bio completion is going in the middle of the request");
-
- /*
- * Update the original request.
- * Do not use blk_end_request() here, because it may complete
- * the original request before the clone, and break the ordering.
- */
- blk_update_request(tio->orig, 0, nr_bytes);
-}
-
static struct dm_rq_target_io *tio_from_request(struct request *rq)
{
return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
@@ -1082,15 +1031,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
dm_put(md);
}
-static void free_rq_clone(struct request *clone, bool must_be_mapped)
+static void free_rq_clone(struct request *clone)
{
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
- WARN_ON_ONCE(must_be_mapped && !clone->q);
-
- blk_rq_unprep_clone(clone);
-
if (md->type == DM_TYPE_MQ_REQUEST_BASED)
/* stacked on blk-mq queue(s) */
tio->ti->type->release_clone_rq(clone);
@@ -1132,7 +1077,7 @@ static void dm_end_request(struct request *clone, int error)
rq->sense_len = clone->sense_len;
}
- free_rq_clone(clone, true);
+ free_rq_clone(clone);
if (!rq->q->mq_ops)
blk_end_request_all(rq, error);
else
@@ -1151,7 +1096,7 @@ static void dm_unprep_request(struct request *rq)
}
if (clone)
- free_rq_clone(clone, false);
+ free_rq_clone(clone);
}
/*
@@ -1164,6 +1109,7 @@ static void old_requeue_request(struct request *rq)
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, rq);
+ blk_run_queue_async(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
@@ -1724,8 +1670,7 @@ static int dm_merge_bvec(struct request_queue *q,
struct mapped_device *md = q->queuedata;
struct dm_table *map = dm_get_live_table_fast(md);
struct dm_target *ti;
- sector_t max_sectors;
- int max_size = 0;
+ sector_t max_sectors, max_size = 0;
if (unlikely(!map))
goto out;
@@ -1740,8 +1685,16 @@ static int dm_merge_bvec(struct request_queue *q,
max_sectors = min(max_io_len(bvm->bi_sector, ti),
(sector_t) queue_max_sectors(q));
max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
- if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
- max_size = 0;
+
+ /*
+ * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
+ * to the targets' merge function since it holds sectors not bytes).
+ * Just doing this as an interim fix for stable@ because the more
+ * comprehensive cleanup of switching to sector_t will impact every
+ * DM target that implements a ->merge hook.
+ */
+ if (max_size > INT_MAX)
+ max_size = INT_MAX;
/*
* merge_bvec_fn() returns number of bytes
@@ -1749,7 +1702,7 @@ static int dm_merge_bvec(struct request_queue *q,
* max is precomputed maximal io size
*/
if (max_size && ti->type->merge)
- max_size = ti->type->merge(ti, bvm, biovec, max_size);
+ max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
/*
* If the target doesn't support merge method and some of the devices
* provided their merge_bvec method (we know this by looking for the
@@ -1821,39 +1774,13 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
dm_complete_request(rq, r);
}
-static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
- void *data)
+static void setup_clone(struct request *clone, struct request *rq,
+ struct dm_rq_target_io *tio)
{
- struct dm_rq_target_io *tio = data;
- struct dm_rq_clone_bio_info *info =
- container_of(bio, struct dm_rq_clone_bio_info, clone);
-
- info->orig = bio_orig;
- info->tio = tio;
- bio->bi_end_io = end_clone_bio;
-
- return 0;
-}
-
-static int setup_clone(struct request *clone, struct request *rq,
- struct dm_rq_target_io *tio, gfp_t gfp_mask)
-{
- int r;
-
- r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
- dm_rq_bio_constructor, tio);
- if (r)
- return r;
-
- clone->cmd = rq->cmd;
- clone->cmd_len = rq->cmd_len;
- clone->sense = rq->sense;
+ blk_rq_prep_clone(clone, rq);
clone->end_io = end_clone_request;
clone->end_io_data = tio;
-
tio->clone = clone;
-
- return 0;
}
static struct request *clone_rq(struct request *rq, struct mapped_device *md,
@@ -1874,12 +1801,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
clone = tio->clone;
blk_rq_init(NULL, clone);
- if (setup_clone(clone, rq, tio, gfp_mask)) {
- /* -ENOMEM */
- if (alloc_clone)
- free_clone_request(md, clone);
- return NULL;
- }
+ setup_clone(clone, rq, tio);
return clone;
}
@@ -1971,13 +1893,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
dm_kill_unmapped_request(rq, r);
return r;
}
- if (IS_ERR(clone))
- return DM_MAPIO_REQUEUE;
- if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
- /* -ENOMEM */
- ti->type->release_clone_rq(clone);
- return DM_MAPIO_REQUEUE;
- }
+ if (r != DM_MAPIO_REMAPPED)
+ return r;
+ setup_clone(clone, rq, tio);
}
switch (r) {
@@ -2431,8 +2349,6 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
goto out;
}
- BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
-
md->io_pool = p->io_pool;
p->io_pool = NULL;
md->rq_pool = p->rq_pool;
@@ -2753,13 +2669,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
/* clone request is allocated at the end of the pdu */
tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
- if (!clone_rq(rq, md, tio, GFP_ATOMIC))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ (void) clone_rq(rq, md, tio, GFP_ATOMIC);
queue_kthread_work(&md->kworker, &tio->work);
} else {
/* Direct call is fine since .queue_rq allows allocations */
- if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
- dm_requeue_unmapped_original_request(md, rq);
+ if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+ /* Undo dm_start_request() before requeuing */
+ rq_completed(md, rq_data_dir(rq), false);
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ }
}
return BLK_MQ_RQ_QUEUE_OK;
@@ -3536,48 +3454,23 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
- unsigned integrity, unsigned per_bio_data_size)
+struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
+ unsigned per_bio_data_size)
{
- struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
- struct kmem_cache *cachep = NULL;
- unsigned int pool_size = 0;
+ struct dm_md_mempools *pools;
+ unsigned int pool_size = dm_get_reserved_bio_based_ios();
unsigned int front_pad;
+ pools = kzalloc(sizeof(*pools), GFP_KERNEL);
if (!pools)
return NULL;
- type = filter_md_type(type, md);
-
- switch (type) {
- case DM_TYPE_BIO_BASED:
- cachep = _io_cache;
- pool_size = dm_get_reserved_bio_based_ios();
- front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
- break;
- case DM_TYPE_REQUEST_BASED:
- cachep = _rq_tio_cache;
- pool_size = dm_get_reserved_rq_based_ios();
- pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
- if (!pools->rq_pool)
- goto out;
- /* fall through to setup remaining rq-based pools */
- case DM_TYPE_MQ_REQUEST_BASED:
- if (!pool_size)
- pool_size = dm_get_reserved_rq_based_ios();
- front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
- /* per_bio_data_size is not used. See __bind_mempools(). */
- WARN_ON(per_bio_data_size != 0);
- break;
- default:
- BUG();
- }
+ front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) +
+ offsetof(struct dm_target_io, clone);
- if (cachep) {
- pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
- if (!pools->io_pool)
- goto out;
- }
+ pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+ if (!pools->io_pool)
+ goto out;
pools->bs = bioset_create_nobvec(pool_size, front_pad);
if (!pools->bs)
@@ -3587,10 +3480,34 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
goto out;
return pools;
-
out:
dm_free_md_mempools(pools);
+ return NULL;
+}
+
+struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md,
+ unsigned type)
+{
+ unsigned int pool_size = dm_get_reserved_rq_based_ios();
+ struct dm_md_mempools *pools;
+
+ pools = kzalloc(sizeof(*pools), GFP_KERNEL);
+ if (!pools)
+ return NULL;
+ if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) {
+ pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
+ if (!pools->rq_pool)
+ goto out;
+ }
+
+ pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache);
+ if (!pools->io_pool)
+ goto out;
+
+ return pools;
+out:
+ dm_free_md_mempools(pools);
return NULL;
}
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 6123c2bf9150..e6e66d087b26 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -222,8 +222,9 @@ void dm_kcopyd_exit(void);
/*
* Mempool operations
*/
-struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
- unsigned integrity, unsigned per_bio_data_size);
+struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
+ unsigned per_bio_data_size);
+struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type);
void dm_free_md_mempools(struct dm_md_mempools *pools);
/*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d4f31e195e26..4dbed4a67aaf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3834,7 +3834,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
err = -EBUSY;
}
spin_unlock(&mddev->lock);
- return err;
+ return err ?: len;
}
err = mddev_lock(mddev);
if (err)
@@ -4211,34 +4211,36 @@ action_store(struct mddev *mddev, const char *page, size_t len)
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
- if (cmd_match(page, "frozen"))
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- else
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
- flush_workqueue(md_misc_wq);
- if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- if (mddev_lock(mddev) == 0) {
+ if (cmd_match(page, "frozen"))
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ else
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
+ mddev_lock(mddev) == 0) {
+ flush_workqueue(md_misc_wq);
+ if (mddev->sync_thread) {
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
- mddev_unlock(mddev);
}
+ mddev_unlock(mddev);
}
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return -EBUSY;
else if (cmd_match(page, "resync"))
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
else if (cmd_match(page, "recover")) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
} else if (cmd_match(page, "reshape")) {
int err;
if (mddev->pers->start_reshape == NULL)
return -EINVAL;
err = mddev_lock(mddev);
if (!err) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
err = mddev->pers->start_reshape(mddev);
mddev_unlock(mddev);
}
@@ -4250,6 +4252,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
else if (!cmd_match(page, "repair"))
return -EINVAL;
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
}
@@ -4818,12 +4821,12 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
+ if (mddev->queue)
+ blk_cleanup_queue(mddev->queue);
if (mddev->gendisk) {
del_gendisk(mddev->gendisk);
put_disk(mddev->gendisk);
}
- if (mddev->queue)
- blk_cleanup_queue(mddev->queue);
kfree(mddev);
}
@@ -8259,6 +8262,7 @@ void md_reap_sync_thread(struct mddev *mddev)
if (mddev_is_clustered(mddev))
md_cluster_ops->metadata_update_finish(mddev);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 2cb59a641cd2..efb654eb5399 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -188,8 +188,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
}
dev[j] = rdev1;
- disk_stack_limits(mddev->gendisk, rdev1->bdev,
- rdev1->data_offset << 9);
+ if (mddev->queue)
+ disk_stack_limits(mddev->gendisk, rdev1->bdev,
+ rdev1->data_offset << 9);
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
conf->has_merge_bvec = 1;
@@ -523,6 +524,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
? (sector & (chunk_sects-1))
: sector_div(sector, chunk_sects));
+ /* Restore due to sector_div */
+ sector = bio->bi_iter.bi_sector;
+
if (sectors < bio_sectors(bio)) {
split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
@@ -530,7 +534,6 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
split = bio;
}
- sector = bio->bi_iter.bi_sector;
zone = find_zone(mddev->private, &sector);
tmp_dev = map_sector(mddev, zone, sector, &sector);
split->bi_bdev = tmp_dev->bdev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e793ab6b3570..f55c3f35b746 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4156,6 +4156,7 @@ static int raid10_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 77dfd720aaa0..b6793d2e051f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
static bool stripe_can_batch(struct stripe_head *sh)
{
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
+ !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
is_full_stripe_write(sh);
}
@@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
< IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
+ if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
+ int seq = sh->bm_seq;
+ if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
+ sh->batch_head->bm_seq > seq)
+ seq = sh->batch_head->bm_seq;
+ set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
+ sh->batch_head->bm_seq = seq;
+ }
+
atomic_inc(&sh->count);
unlock_out:
unlock_two_stripes(head, sh);
@@ -1078,9 +1088,6 @@ again:
pr_debug("skip op %ld on disc %d for sector %llu\n",
bi->bi_rw, i, (unsigned long long)sh->sector);
clear_bit(R5_LOCKED, &sh->dev[i].flags);
- if (sh->batch_head)
- set_bit(STRIPE_BATCH_ERR,
- &sh->batch_head->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -1825,7 +1832,7 @@ again:
} else
init_async_submit(&submit, 0, tx, NULL, NULL,
to_addr_conv(sh, percpu, j));
- async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+ tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
if (!last_stripe) {
j++;
sh = list_first_entry(&sh->batch_list, struct stripe_head,
@@ -1971,17 +1978,30 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu();
}
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+{
+ struct stripe_head *sh;
+
+ sh = kmem_cache_zalloc(sc, gfp);
+ if (sh) {
+ spin_lock_init(&sh->stripe_lock);
+ spin_lock_init(&sh->batch_lock);
+ INIT_LIST_HEAD(&sh->batch_list);
+ INIT_LIST_HEAD(&sh->lru);
+ atomic_set(&sh->count, 1);
+ }
+ return sh;
+}
static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{
struct stripe_head *sh;
- sh = kmem_cache_zalloc(conf->slab_cache, gfp);
+
+ sh = alloc_stripe(conf->slab_cache, gfp);
if (!sh)
return 0;
sh->raid_conf = conf;
- spin_lock_init(&sh->stripe_lock);
-
if (grow_buffers(sh, gfp)) {
shrink_buffers(sh);
kmem_cache_free(conf->slab_cache, sh);
@@ -1990,13 +2010,8 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
sh->hash_lock_index =
conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
/* we just created an active stripe so... */
- atomic_set(&sh->count, 1);
atomic_inc(&conf->active_stripes);
- INIT_LIST_HEAD(&sh->lru);
- spin_lock_init(&sh->batch_lock);
- INIT_LIST_HEAD(&sh->batch_list);
- sh->batch_head = NULL;
release_stripe(sh);
conf->max_nr_stripes++;
return 1;
@@ -2060,6 +2075,35 @@ static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags)
return ret;
}
+static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
+{
+ unsigned long cpu;
+ int err = 0;
+
+ mddev_suspend(conf->mddev);
+ get_online_cpus();
+ for_each_present_cpu(cpu) {
+ struct raid5_percpu *percpu;
+ struct flex_array *scribble;
+
+ percpu = per_cpu_ptr(conf->percpu, cpu);
+ scribble = scribble_alloc(new_disks,
+ new_sectors / STRIPE_SECTORS,
+ GFP_NOIO);
+
+ if (scribble) {
+ flex_array_free(percpu->scribble);
+ percpu->scribble = scribble;
+ } else {
+ err = -ENOMEM;
+ break;
+ }
+ }
+ put_online_cpus();
+ mddev_resume(conf->mddev);
+ return err;
+}
+
static int resize_stripes(struct r5conf *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
@@ -2088,7 +2132,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
struct stripe_head *osh, *nsh;
LIST_HEAD(newstripes);
struct disk_info *ndisks;
- unsigned long cpu;
int err;
struct kmem_cache *sc;
int i;
@@ -2109,13 +2152,11 @@ static int resize_stripes(struct r5conf *conf, int newsize)
return -ENOMEM;
for (i = conf->max_nr_stripes; i; i--) {
- nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
+ nsh = alloc_stripe(sc, GFP_KERNEL);
if (!nsh)
break;
nsh->raid_conf = conf;
- spin_lock_init(&nsh->stripe_lock);
-
list_add(&nsh->lru, &newstripes);
}
if (i) {
@@ -2142,13 +2183,11 @@ static int resize_stripes(struct r5conf *conf, int newsize)
lock_device_hash_lock(conf, hash));
osh = get_free_stripe(conf, hash);
unlock_device_hash_lock(conf, hash);
- atomic_set(&nsh->count, 1);
+
for(i=0; i<conf->pool_size; i++) {
nsh->dev[i].page = osh->dev[i].page;
nsh->dev[i].orig_page = osh->dev[i].page;
}
- for( ; i<newsize; i++)
- nsh->dev[i].page = NULL;
nsh->hash_lock_index = hash;
kmem_cache_free(conf->slab_cache, osh);
cnt++;
@@ -2174,25 +2213,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
} else
err = -ENOMEM;
- get_online_cpus();
- for_each_present_cpu(cpu) {
- struct raid5_percpu *percpu;
- struct flex_array *scribble;
-
- percpu = per_cpu_ptr(conf->percpu, cpu);
- scribble = scribble_alloc(newsize, conf->chunk_sectors /
- STRIPE_SECTORS, GFP_NOIO);
-
- if (scribble) {
- flex_array_free(percpu->scribble);
- percpu->scribble = scribble;
- } else {
- err = -ENOMEM;
- break;
- }
- }
- put_online_cpus();
-
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
@@ -2212,7 +2232,8 @@ static int resize_stripes(struct r5conf *conf, int newsize)
conf->slab_cache = sc;
conf->active_name = 1-conf->active_name;
- conf->pool_size = newsize;
+ if (!err)
+ conf->pool_size = newsize;
return err;
}
@@ -2434,7 +2455,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
}
rdev_dec_pending(rdev, conf->mddev);
- if (sh->batch_head && !uptodate)
+ if (sh->batch_head && !uptodate && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@@ -2976,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
(unsigned long long)(*bip)->bi_iter.bi_sector,
(unsigned long long)sh->sector, dd_idx);
- spin_unlock_irq(&sh->stripe_lock);
if (conf->mddev->bitmap && firstwrite) {
+ /* Cannot hold spinlock over bitmap_startwrite,
+ * but must ensure this isn't added to a batch until
+ * we have added to the bitmap and set bm_seq.
+ * So set STRIPE_BITMAP_PENDING to prevent
+ * batching.
+ * If multiple add_stripe_bio() calls race here they
+ * much all set STRIPE_BITMAP_PENDING. So only the first one
+ * to complete "bitmap_startwrite" gets to set
+ * STRIPE_BIT_DELAY. This is important as once a stripe
+ * is added to a batch, STRIPE_BIT_DELAY cannot be changed
+ * any more.
+ */
+ set_bit(STRIPE_BITMAP_PENDING, &sh->state);
+ spin_unlock_irq(&sh->stripe_lock);
bitmap_startwrite(conf->mddev->bitmap, sh->sector,
STRIPE_SECTORS, 0);
- sh->bm_seq = conf->seq_flush+1;
- set_bit(STRIPE_BIT_DELAY, &sh->state);
+ spin_lock_irq(&sh->stripe_lock);
+ clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
+ if (!sh->batch_head) {
+ sh->bm_seq = conf->seq_flush+1;
+ set_bit(STRIPE_BIT_DELAY, &sh->state);
+ }
}
+ spin_unlock_irq(&sh->stripe_lock);
if (stripe_can_batch(sh))
stripe_add_to_batch_list(conf, sh);
@@ -3278,7 +3317,9 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
/* reconstruct-write isn't being forced */
return 0;
for (i = 0; i < s->failed; i++) {
- if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
+ if (s->failed_num[i] != sh->pd_idx &&
+ s->failed_num[i] != sh->qd_idx &&
+ !test_bit(R5_UPTODATE, &fdev[i]->flags) &&
!test_bit(R5_OVERWRITE, &fdev[i]->flags))
return 1;
}
@@ -3298,6 +3339,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
*/
BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
BUG_ON(test_bit(R5_Wantread, &dev->flags));
+ BUG_ON(sh->batch_head);
if ((s->uptodate == disks - 1) &&
(s->failed && (disk_idx == s->failed_num[0] ||
disk_idx == s->failed_num[1]))) {
@@ -3366,7 +3408,6 @@ static void handle_stripe_fill(struct stripe_head *sh,
{
int i;
- BUG_ON(sh->batch_head);
/* look for blocks to read/compute, skip this if a compute
* is already in flight, or if the stripe contents are in the
* midst of changing due to a write
@@ -3379,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh,
set_bit(STRIPE_HANDLE, &sh->state);
}
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+ unsigned long handle_flags);
/* handle_stripe_clean_event
* any written block on an uptodate or failed drive can be returned.
* Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
@@ -3392,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf,
int discard_pending = 0;
struct stripe_head *head_sh = sh;
bool do_endio = false;
- int wakeup_nr = 0;
for (i = disks; i--; )
if (sh->dev[i].written) {
@@ -3481,44 +3523,8 @@ unhash:
if (atomic_dec_and_test(&conf->pending_full_writes))
md_wakeup_thread(conf->mddev->thread);
- if (!head_sh->batch_head || !do_endio)
- return;
- for (i = 0; i < head_sh->disks; i++) {
- if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
- wakeup_nr++;
- }
- while (!list_empty(&head_sh->batch_list)) {
- int i;
- sh = list_first_entry(&head_sh->batch_list,
- struct stripe_head, batch_list);
- list_del_init(&sh->batch_list);
-
- set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
- head_sh->state & ~((1 << STRIPE_ACTIVE) |
- (1 << STRIPE_PREREAD_ACTIVE) |
- STRIPE_EXPAND_SYNC_FLAG));
- sh->check_state = head_sh->check_state;
- sh->reconstruct_state = head_sh->reconstruct_state;
- for (i = 0; i < sh->disks; i++) {
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- wakeup_nr++;
- sh->dev[i].flags = head_sh->dev[i].flags;
- }
-
- spin_lock_irq(&sh->stripe_lock);
- sh->batch_head = NULL;
- spin_unlock_irq(&sh->stripe_lock);
- if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
- set_bit(STRIPE_HANDLE, &sh->state);
- release_stripe(sh);
- }
-
- spin_lock_irq(&head_sh->stripe_lock);
- head_sh->batch_head = NULL;
- spin_unlock_irq(&head_sh->stripe_lock);
- wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
- if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
- set_bit(STRIPE_HANDLE, &head_sh->state);
+ if (head_sh->batch_head && do_endio)
+ break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
}
static void handle_stripe_dirtying(struct r5conf *conf,
@@ -4159,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
static int clear_batch_ready(struct stripe_head *sh)
{
+ /* Return '1' if this is a member of batch, or
+ * '0' if it is a lone stripe or a head which can now be
+ * handled.
+ */
struct stripe_head *tmp;
if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
- return 0;
+ return (sh->batch_head && sh->batch_head != sh);
spin_lock(&sh->stripe_lock);
if (!sh->batch_head) {
spin_unlock(&sh->stripe_lock);
@@ -4189,46 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh)
return 0;
}
-static void check_break_stripe_batch_list(struct stripe_head *sh)
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+ unsigned long handle_flags)
{
- struct stripe_head *head_sh, *next;
+ struct stripe_head *sh, *next;
int i;
+ int do_wakeup = 0;
- if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
- return;
+ list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
- head_sh = sh;
- do {
- sh = list_first_entry(&sh->batch_list,
- struct stripe_head, batch_list);
- BUG_ON(sh == head_sh);
- } while (!test_bit(STRIPE_DEGRADED, &sh->state));
-
- while (sh != head_sh) {
- next = list_first_entry(&sh->batch_list,
- struct stripe_head, batch_list);
list_del_init(&sh->batch_list);
- set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
- head_sh->state & ~((1 << STRIPE_ACTIVE) |
- (1 << STRIPE_PREREAD_ACTIVE) |
- (1 << STRIPE_DEGRADED) |
- STRIPE_EXPAND_SYNC_FLAG));
+ WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
+ (1 << STRIPE_SYNCING) |
+ (1 << STRIPE_REPLACED) |
+ (1 << STRIPE_PREREAD_ACTIVE) |
+ (1 << STRIPE_DELAYED) |
+ (1 << STRIPE_BIT_DELAY) |
+ (1 << STRIPE_FULL_WRITE) |
+ (1 << STRIPE_BIOFILL_RUN) |
+ (1 << STRIPE_COMPUTE_RUN) |
+ (1 << STRIPE_OPS_REQ_PENDING) |
+ (1 << STRIPE_DISCARD) |
+ (1 << STRIPE_BATCH_READY) |
+ (1 << STRIPE_BATCH_ERR) |
+ (1 << STRIPE_BITMAP_PENDING)));
+ WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
+ (1 << STRIPE_REPLACED)));
+
+ set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+ (1 << STRIPE_DEGRADED)),
+ head_sh->state & (1 << STRIPE_INSYNC));
+
sh->check_state = head_sh->check_state;
sh->reconstruct_state = head_sh->reconstruct_state;
- for (i = 0; i < sh->disks; i++)
+ for (i = 0; i < sh->disks; i++) {
+ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ do_wakeup = 1;
sh->dev[i].flags = head_sh->dev[i].flags &
(~((1 << R5_WriteError) | (1 << R5_Overlap)));
-
+ }
spin_lock_irq(&sh->stripe_lock);
sh->batch_head = NULL;
spin_unlock_irq(&sh->stripe_lock);
-
- set_bit(STRIPE_HANDLE, &sh->state);
+ if (handle_flags == 0 ||
+ sh->state & handle_flags)
+ set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
-
- sh = next;
}
+ spin_lock_irq(&head_sh->stripe_lock);
+ head_sh->batch_head = NULL;
+ spin_unlock_irq(&head_sh->stripe_lock);
+ for (i = 0; i < head_sh->disks; i++)
+ if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
+ do_wakeup = 1;
+ if (head_sh->state & handle_flags)
+ set_bit(STRIPE_HANDLE, &head_sh->state);
+
+ if (do_wakeup)
+ wake_up(&head_sh->raid_conf->wait_for_overlap);
}
static void handle_stripe(struct stripe_head *sh)
@@ -4253,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh)
return;
}
- check_break_stripe_batch_list(sh);
+ if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
+ break_stripe_batch_list(sh, 0);
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
spin_lock(&sh->stripe_lock);
@@ -4307,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh)
if (s.failed > conf->max_degraded) {
sh->check_state = 0;
sh->reconstruct_state = 0;
+ break_stripe_batch_list(sh, 0);
if (s.to_read+s.to_write+s.written)
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
if (s.syncing + s.replacing)
@@ -6221,8 +6252,11 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
percpu->spare_page = alloc_page(GFP_KERNEL);
if (!percpu->scribble)
percpu->scribble = scribble_alloc(max(conf->raid_disks,
- conf->previous_raid_disks), conf->chunk_sectors /
- STRIPE_SECTORS, GFP_KERNEL);
+ conf->previous_raid_disks),
+ max(conf->chunk_sectors,
+ conf->prev_chunk_sectors)
+ / STRIPE_SECTORS,
+ GFP_KERNEL);
if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
free_scratch_buffer(conf, percpu);
@@ -7198,6 +7232,15 @@ static int check_reshape(struct mddev *mddev)
if (!check_stripe_cache(mddev))
return -ENOSPC;
+ if (mddev->new_chunk_sectors > mddev->chunk_sectors ||
+ mddev->delta_disks > 0)
+ if (resize_chunks(conf,
+ conf->previous_raid_disks
+ + max(0, mddev->delta_disks),
+ max(mddev->new_chunk_sectors,
+ mddev->chunk_sectors)
+ ) < 0)
+ return -ENOMEM;
return resize_stripes(conf, (conf->previous_raid_disks
+ mddev->delta_disks));
}
@@ -7311,6 +7354,7 @@ static int raid5_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 7dc0dd86074b..896d603ad0da 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -337,9 +337,12 @@ enum {
STRIPE_ON_RELEASE_LIST,
STRIPE_BATCH_READY,
STRIPE_BATCH_ERR,
+ STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add
+ * to batch yet.
+ */
};
-#define STRIPE_EXPAND_SYNC_FLAG \
+#define STRIPE_EXPAND_SYNC_FLAGS \
((1 << STRIPE_EXPAND_SOURCE) |\
(1 << STRIPE_EXPAND_READY) |\
(1 << STRIPE_EXPANDING) |\