diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/io.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 2 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 12 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 41 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-verity.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 211 | ||||
-rw-r--r-- | drivers/md/dm.h | 5 | ||||
-rw-r--r-- | drivers/md/md.c | 32 | ||||
-rw-r--r-- | drivers/md/raid0.c | 9 | ||||
-rw-r--r-- | drivers/md/raid10.c | 1 | ||||
-rw-r--r-- | drivers/md/raid5.c | 270 | ||||
-rw-r--r-- | drivers/md/raid5.h | 5 |
18 files changed, 299 insertions, 322 deletions
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index fa028fa82df4..cb64e64a4789 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -55,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl) s->bio->bi_end_io = s->bi_end_io; s->bio->bi_private = s->bi_private; - bio_endio_nodec(s->bio, 0); + bio_endio(s->bio, 0); closure_debug_destroy(&s->cl); mempool_free(s, s->p->bio_split_hook); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ab43faddb447..1616f668a4cb 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -619,7 +619,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio) bio->bi_end_io = request_endio; bio->bi_private = &s->cl; - atomic_set(&bio->bi_cnt, 3); + bio_cnt_set(bio, 3); } static void search_free(struct closure *cl) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2bc56e2a3526..135a0907e9de 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -177,11 +177,16 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde * nr_pending is 0 and In_sync is clear, the entries we return will * still be in the same position on the list when we re-enter * list_for_each_entry_continue_rcu. + * + * Note that if entered with 'rdev == NULL' to start at the + * beginning, we temporarily assign 'rdev' to an address which + * isn't really an rdev, but which can be used by + * list_for_each_entry_continue_rcu() to find the first entry. */ rcu_read_lock(); if (rdev == NULL) /* start at the beginning */ - rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set); + rdev = list_entry(&mddev->disks, struct md_rdev, same_set); else { /* release the previous rdev and start from there. */ rdev_dec_pending(rdev, mddev); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7755af351867..41b2594a80c6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -86,12 +86,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) { bio->bi_end_io = h->bi_end_io; bio->bi_private = h->bi_private; - - /* - * Must bump bi_remaining to allow bio to complete with - * restored bi_end_io. - */ - atomic_inc(&bio->bi_remaining); } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9eeea196328a..5503e43e5f28 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -925,10 +925,11 @@ static int crypt_convert(struct crypt_config *cc, switch (r) { /* async */ - case -EINPROGRESS: case -EBUSY: wait_for_completion(&ctx->restart); reinit_completion(&ctx->restart); + /* fall through*/ + case -EINPROGRESS: ctx->req = NULL; ctx->cc_sector++; continue; @@ -1345,8 +1346,10 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); struct crypt_config *cc = io->cc; - if (error == -EINPROGRESS) + if (error == -EINPROGRESS) { + complete(&ctx->restart); return; + } if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); @@ -1357,15 +1360,12 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); if (!atomic_dec_and_test(&ctx->cc_pending)) - goto done; + return; if (bio_data_dir(io->base_bio) == READ) kcryptd_crypt_read_done(io); else kcryptd_crypt_write_io_submit(io, 1); -done: - if (!completion_done(&ctx->restart)) - complete(&ctx->restart); } static void kcryptd_crypt(struct work_struct *work) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 63953477a07c..eff7bdd7731d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -429,9 +429,11 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, /* blk-mq request-based interface */ *__clone = blk_get_request(bdev_get_queue(bdev), rq_data_dir(rq), GFP_ATOMIC); - if (IS_ERR(*__clone)) + if (IS_ERR(*__clone)) { /* ENOMEM, requeue */ + clear_mapinfo(m, map_context); return r; + } (*__clone)->bio = (*__clone)->biotail = NULL; (*__clone)->rq_disk = bdev->bd_disk; (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 089d62751f7f..743fa9bbae9e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1254,8 +1254,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) dm_bio_restore(bd, bio); bio_record->details.bi_bdev = NULL; - atomic_inc(&bio->bi_remaining); - queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f83a0f3fc365..7c82d3ccce87 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1478,7 +1478,6 @@ out: if (full_bio) { full_bio->bi_end_io = pe->full_bio_end_io; full_bio->bi_private = pe->full_bio_private; - atomic_inc(&full_bio->bi_remaining); } increment_pending_exceptions_done_count(); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d9b00b8565c6..a5f94125ad01 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -820,6 +820,12 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args) } EXPORT_SYMBOL(dm_consume_args); +static bool __table_type_request_based(unsigned table_type) +{ + return (table_type == DM_TYPE_REQUEST_BASED || + table_type == DM_TYPE_MQ_REQUEST_BASED); +} + static int dm_table_set_type(struct dm_table *t) { unsigned i; @@ -852,8 +858,7 @@ static int dm_table_set_type(struct dm_table *t) * Determine the type from the live device. * Default to bio-based if device is new. */ - if (live_md_type == DM_TYPE_REQUEST_BASED || - live_md_type == DM_TYPE_MQ_REQUEST_BASED) + if (__table_type_request_based(live_md_type)) request_based = 1; else bio_based = 1; @@ -903,7 +908,7 @@ static int dm_table_set_type(struct dm_table *t) } t->type = DM_TYPE_MQ_REQUEST_BASED; - } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) { + } else if (list_empty(devices) && __table_type_request_based(live_md_type)) { /* inherit live MD type */ t->type = live_md_type; @@ -925,10 +930,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t) bool dm_table_request_based(struct dm_table *t) { - unsigned table_type = dm_table_get_type(t); - - return (table_type == DM_TYPE_REQUEST_BASED || - table_type == DM_TYPE_MQ_REQUEST_BASED); + return __table_type_request_based(dm_table_get_type(t)); } bool dm_table_mq_request_based(struct dm_table *t) @@ -940,21 +942,28 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * { unsigned type = dm_table_get_type(t); unsigned per_bio_data_size = 0; - struct dm_target *tgt; unsigned i; - if (unlikely(type == DM_TYPE_NONE)) { + switch (type) { + case DM_TYPE_BIO_BASED: + for (i = 0; i < t->num_targets; i++) { + struct dm_target *tgt = t->targets + i; + + per_bio_data_size = max(per_bio_data_size, + tgt->per_bio_data_size); + } + t->mempools = dm_alloc_bio_mempools(t->integrity_supported, + per_bio_data_size); + break; + case DM_TYPE_REQUEST_BASED: + case DM_TYPE_MQ_REQUEST_BASED: + t->mempools = dm_alloc_rq_mempools(md, type); + break; + default: DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - if (type == DM_TYPE_BIO_BASED) - for (i = 0; i < t->num_targets; i++) { - tgt = t->targets + i; - per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); - } - - t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 921aafd12aee..e852602c0091 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -793,10 +793,9 @@ static void inc_remap_and_issue_cell(struct thin_c *tc, static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) { + if (m->bio) m->bio->bi_end_io = m->saved_bi_end_io; - atomic_inc(&m->bio->bi_remaining); - } + cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -810,10 +809,8 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) int r; bio = m->bio; - if (bio) { + if (bio) bio->bi_end_io = m->saved_bi_end_io; - atomic_inc(&bio->bi_remaining); - } if (m->err) { cell_error(pool, m->cell); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 66616db33e6f..bb9c6a00e4b0 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -459,7 +459,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; - bio_endio_nodec(bio, error); + bio_endio(bio, error); } static void verity_work(struct work_struct *w) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a930b72314ac..4d6f089a0e9e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -990,57 +990,6 @@ static void clone_endio(struct bio *bio, int error) dec_pending(io, error); } -/* - * Partial completion handling for request-based dm - */ -static void end_clone_bio(struct bio *clone, int error) -{ - struct dm_rq_clone_bio_info *info = - container_of(clone, struct dm_rq_clone_bio_info, clone); - struct dm_rq_target_io *tio = info->tio; - struct bio *bio = info->orig; - unsigned int nr_bytes = info->orig->bi_iter.bi_size; - - bio_put(clone); - - if (tio->error) - /* - * An error has already been detected on the request. - * Once error occurred, just let clone->end_io() handle - * the remainder. - */ - return; - else if (error) { - /* - * Don't notice the error to the upper layer yet. - * The error handling decision is made by the target driver, - * when the request is completed. - */ - tio->error = error; - return; - } - - /* - * I/O for the bio successfully completed. - * Notice the data completion to the upper layer. - */ - - /* - * bios are processed from the head of the list. - * So the completing bio should always be rq->bio. - * If it's not, something wrong is happening. - */ - if (tio->orig->bio != bio) - DMERR("bio completion is going in the middle of the request"); - - /* - * Update the original request. - * Do not use blk_end_request() here, because it may complete - * the original request before the clone, and break the ordering. - */ - blk_update_request(tio->orig, 0, nr_bytes); -} - static struct dm_rq_target_io *tio_from_request(struct request *rq) { return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); @@ -1082,15 +1031,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone, bool must_be_mapped) +static void free_rq_clone(struct request *clone) { struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; - WARN_ON_ONCE(must_be_mapped && !clone->q); - - blk_rq_unprep_clone(clone); - if (md->type == DM_TYPE_MQ_REQUEST_BASED) /* stacked on blk-mq queue(s) */ tio->ti->type->release_clone_rq(clone); @@ -1132,7 +1077,7 @@ static void dm_end_request(struct request *clone, int error) rq->sense_len = clone->sense_len; } - free_rq_clone(clone, true); + free_rq_clone(clone); if (!rq->q->mq_ops) blk_end_request_all(rq, error); else @@ -1151,7 +1096,7 @@ static void dm_unprep_request(struct request *rq) } if (clone) - free_rq_clone(clone, false); + free_rq_clone(clone); } /* @@ -1164,6 +1109,7 @@ static void old_requeue_request(struct request *rq) spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, rq); + blk_run_queue_async(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -1724,8 +1670,7 @@ static int dm_merge_bvec(struct request_queue *q, struct mapped_device *md = q->queuedata; struct dm_table *map = dm_get_live_table_fast(md); struct dm_target *ti; - sector_t max_sectors; - int max_size = 0; + sector_t max_sectors, max_size = 0; if (unlikely(!map)) goto out; @@ -1740,8 +1685,16 @@ static int dm_merge_bvec(struct request_queue *q, max_sectors = min(max_io_len(bvm->bi_sector, ti), (sector_t) queue_max_sectors(q)); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */ - max_size = 0; + + /* + * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t + * to the targets' merge function since it holds sectors not bytes). + * Just doing this as an interim fix for stable@ because the more + * comprehensive cleanup of switching to sector_t will impact every + * DM target that implements a ->merge hook. + */ + if (max_size > INT_MAX) + max_size = INT_MAX; /* * merge_bvec_fn() returns number of bytes @@ -1749,7 +1702,7 @@ static int dm_merge_bvec(struct request_queue *q, * max is precomputed maximal io size */ if (max_size && ti->type->merge) - max_size = ti->type->merge(ti, bvm, biovec, max_size); + max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); /* * If the target doesn't support merge method and some of the devices * provided their merge_bvec method (we know this by looking for the @@ -1821,39 +1774,13 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq) dm_complete_request(rq, r); } -static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, - void *data) +static void setup_clone(struct request *clone, struct request *rq, + struct dm_rq_target_io *tio) { - struct dm_rq_target_io *tio = data; - struct dm_rq_clone_bio_info *info = - container_of(bio, struct dm_rq_clone_bio_info, clone); - - info->orig = bio_orig; - info->tio = tio; - bio->bi_end_io = end_clone_bio; - - return 0; -} - -static int setup_clone(struct request *clone, struct request *rq, - struct dm_rq_target_io *tio, gfp_t gfp_mask) -{ - int r; - - r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, - dm_rq_bio_constructor, tio); - if (r) - return r; - - clone->cmd = rq->cmd; - clone->cmd_len = rq->cmd_len; - clone->sense = rq->sense; + blk_rq_prep_clone(clone, rq); clone->end_io = end_clone_request; clone->end_io_data = tio; - tio->clone = clone; - - return 0; } static struct request *clone_rq(struct request *rq, struct mapped_device *md, @@ -1874,12 +1801,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, clone = tio->clone; blk_rq_init(NULL, clone); - if (setup_clone(clone, rq, tio, gfp_mask)) { - /* -ENOMEM */ - if (alloc_clone) - free_clone_request(md, clone); - return NULL; - } + setup_clone(clone, rq, tio); return clone; } @@ -1971,13 +1893,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, dm_kill_unmapped_request(rq, r); return r; } - if (IS_ERR(clone)) - return DM_MAPIO_REQUEUE; - if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { - /* -ENOMEM */ - ti->type->release_clone_rq(clone); - return DM_MAPIO_REQUEUE; - } + if (r != DM_MAPIO_REMAPPED) + return r; + setup_clone(clone, rq, tio); } switch (r) { @@ -2431,8 +2349,6 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) goto out; } - BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); - md->io_pool = p->io_pool; p->io_pool = NULL; md->rq_pool = p->rq_pool; @@ -2753,13 +2669,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { /* clone request is allocated at the end of the pdu */ tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); - if (!clone_rq(rq, md, tio, GFP_ATOMIC)) - return BLK_MQ_RQ_QUEUE_BUSY; + (void) clone_rq(rq, md, tio, GFP_ATOMIC); queue_kthread_work(&md->kworker, &tio->work); } else { /* Direct call is fine since .queue_rq allows allocations */ - if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) - dm_requeue_unmapped_original_request(md, rq); + if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { + /* Undo dm_start_request() before requeuing */ + rq_completed(md, rq_data_dir(rq), false); + return BLK_MQ_RQ_QUEUE_BUSY; + } } return BLK_MQ_RQ_QUEUE_OK; @@ -3536,48 +3454,23 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, - unsigned integrity, unsigned per_bio_data_size) +struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, + unsigned per_bio_data_size) { - struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); - struct kmem_cache *cachep = NULL; - unsigned int pool_size = 0; + struct dm_md_mempools *pools; + unsigned int pool_size = dm_get_reserved_bio_based_ios(); unsigned int front_pad; + pools = kzalloc(sizeof(*pools), GFP_KERNEL); if (!pools) return NULL; - type = filter_md_type(type, md); - - switch (type) { - case DM_TYPE_BIO_BASED: - cachep = _io_cache; - pool_size = dm_get_reserved_bio_based_ios(); - front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); - break; - case DM_TYPE_REQUEST_BASED: - cachep = _rq_tio_cache; - pool_size = dm_get_reserved_rq_based_ios(); - pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); - if (!pools->rq_pool) - goto out; - /* fall through to setup remaining rq-based pools */ - case DM_TYPE_MQ_REQUEST_BASED: - if (!pool_size) - pool_size = dm_get_reserved_rq_based_ios(); - front_pad = offsetof(struct dm_rq_clone_bio_info, clone); - /* per_bio_data_size is not used. See __bind_mempools(). */ - WARN_ON(per_bio_data_size != 0); - break; - default: - BUG(); - } + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + + offsetof(struct dm_target_io, clone); - if (cachep) { - pools->io_pool = mempool_create_slab_pool(pool_size, cachep); - if (!pools->io_pool) - goto out; - } + pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache); + if (!pools->io_pool) + goto out; pools->bs = bioset_create_nobvec(pool_size, front_pad); if (!pools->bs) @@ -3587,10 +3480,34 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t goto out; return pools; - out: dm_free_md_mempools(pools); + return NULL; +} + +struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, + unsigned type) +{ + unsigned int pool_size = dm_get_reserved_rq_based_ios(); + struct dm_md_mempools *pools; + + pools = kzalloc(sizeof(*pools), GFP_KERNEL); + if (!pools) + return NULL; + if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) { + pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); + if (!pools->rq_pool) + goto out; + } + + pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache); + if (!pools->io_pool) + goto out; + + return pools; +out: + dm_free_md_mempools(pools); return NULL; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6123c2bf9150..e6e66d087b26 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -222,8 +222,9 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, - unsigned integrity, unsigned per_bio_data_size); +struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, + unsigned per_bio_data_size); +struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type); void dm_free_md_mempools(struct dm_md_mempools *pools); /* diff --git a/drivers/md/md.c b/drivers/md/md.c index d4f31e195e26..4dbed4a67aaf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3834,7 +3834,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) err = -EBUSY; } spin_unlock(&mddev->lock); - return err; + return err ?: len; } err = mddev_lock(mddev); if (err) @@ -4211,34 +4211,36 @@ action_store(struct mddev *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (cmd_match(page, "frozen")) - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - else - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { - flush_workqueue(md_misc_wq); - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - if (mddev_lock(mddev) == 0) { + if (cmd_match(page, "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && + mddev_lock(mddev) == 0) { + flush_workqueue(md_misc_wq); + if (mddev->sync_thread) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); - mddev_unlock(mddev); } + mddev_unlock(mddev); } } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return -EBUSY; else if (cmd_match(page, "resync")) - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else if (cmd_match(page, "recover")) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } else if (cmd_match(page, "reshape")) { int err; if (mddev->pers->start_reshape == NULL) return -EINVAL; err = mddev_lock(mddev); if (!err) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); mddev_unlock(mddev); } @@ -4250,6 +4252,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); else if (!cmd_match(page, "repair")) return -EINVAL; + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); } @@ -4818,12 +4821,12 @@ static void md_free(struct kobject *ko) if (mddev->sysfs_state) sysfs_put(mddev->sysfs_state); + if (mddev->queue) + blk_cleanup_queue(mddev->queue); if (mddev->gendisk) { del_gendisk(mddev->gendisk); put_disk(mddev->gendisk); } - if (mddev->queue) - blk_cleanup_queue(mddev->queue); kfree(mddev); } @@ -8259,6 +8262,7 @@ void md_reap_sync_thread(struct mddev *mddev) if (mddev_is_clustered(mddev)) md_cluster_ops->metadata_update_finish(mddev); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 2cb59a641cd2..efb654eb5399 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -188,8 +188,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } dev[j] = rdev1; - disk_stack_limits(mddev->gendisk, rdev1->bdev, - rdev1->data_offset << 9); + if (mddev->queue) + disk_stack_limits(mddev->gendisk, rdev1->bdev, + rdev1->data_offset << 9); if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) conf->has_merge_bvec = 1; @@ -523,6 +524,9 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) ? (sector & (chunk_sects-1)) : sector_div(sector, chunk_sects)); + /* Restore due to sector_div */ + sector = bio->bi_iter.bi_sector; + if (sectors < bio_sectors(bio)) { split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set); bio_chain(split, bio); @@ -530,7 +534,6 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) split = bio; } - sector = bio->bi_iter.bi_sector; zone = find_zone(mddev->private, §or); tmp_dev = map_sector(mddev, zone, sector, §or); split->bi_bdev = tmp_dev->bdev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e793ab6b3570..f55c3f35b746 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4156,6 +4156,7 @@ static int raid10_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 77dfd720aaa0..b6793d2e051f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static bool stripe_can_batch(struct stripe_head *sh) { return test_bit(STRIPE_BATCH_READY, &sh->state) && + !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && is_full_stripe_write(sh); } @@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh < IO_THRESHOLD) md_wakeup_thread(conf->mddev->thread); + if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { + int seq = sh->bm_seq; + if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && + sh->batch_head->bm_seq > seq) + seq = sh->batch_head->bm_seq; + set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); + sh->batch_head->bm_seq = seq; + } + atomic_inc(&sh->count); unlock_out: unlock_two_stripes(head, sh); @@ -1078,9 +1088,6 @@ again: pr_debug("skip op %ld on disc %d for sector %llu\n", bi->bi_rw, i, (unsigned long long)sh->sector); clear_bit(R5_LOCKED, &sh->dev[i].flags); - if (sh->batch_head) - set_bit(STRIPE_BATCH_ERR, - &sh->batch_head->state); set_bit(STRIPE_HANDLE, &sh->state); } @@ -1825,7 +1832,7 @@ again: } else init_async_submit(&submit, 0, tx, NULL, NULL, to_addr_conv(sh, percpu, j)); - async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); if (!last_stripe) { j++; sh = list_first_entry(&sh->batch_list, struct stripe_head, @@ -1971,17 +1978,30 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) put_cpu(); } +static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) +{ + struct stripe_head *sh; + + sh = kmem_cache_zalloc(sc, gfp); + if (sh) { + spin_lock_init(&sh->stripe_lock); + spin_lock_init(&sh->batch_lock); + INIT_LIST_HEAD(&sh->batch_list); + INIT_LIST_HEAD(&sh->lru); + atomic_set(&sh->count, 1); + } + return sh; +} static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) { struct stripe_head *sh; - sh = kmem_cache_zalloc(conf->slab_cache, gfp); + + sh = alloc_stripe(conf->slab_cache, gfp); if (!sh) return 0; sh->raid_conf = conf; - spin_lock_init(&sh->stripe_lock); - if (grow_buffers(sh, gfp)) { shrink_buffers(sh); kmem_cache_free(conf->slab_cache, sh); @@ -1990,13 +2010,8 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) sh->hash_lock_index = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; /* we just created an active stripe so... */ - atomic_set(&sh->count, 1); atomic_inc(&conf->active_stripes); - INIT_LIST_HEAD(&sh->lru); - spin_lock_init(&sh->batch_lock); - INIT_LIST_HEAD(&sh->batch_list); - sh->batch_head = NULL; release_stripe(sh); conf->max_nr_stripes++; return 1; @@ -2060,6 +2075,35 @@ static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags) return ret; } +static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) +{ + unsigned long cpu; + int err = 0; + + mddev_suspend(conf->mddev); + get_online_cpus(); + for_each_present_cpu(cpu) { + struct raid5_percpu *percpu; + struct flex_array *scribble; + + percpu = per_cpu_ptr(conf->percpu, cpu); + scribble = scribble_alloc(new_disks, + new_sectors / STRIPE_SECTORS, + GFP_NOIO); + + if (scribble) { + flex_array_free(percpu->scribble); + percpu->scribble = scribble; + } else { + err = -ENOMEM; + break; + } + } + put_online_cpus(); + mddev_resume(conf->mddev); + return err; +} + static int resize_stripes(struct r5conf *conf, int newsize) { /* Make all the stripes able to hold 'newsize' devices. @@ -2088,7 +2132,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) struct stripe_head *osh, *nsh; LIST_HEAD(newstripes); struct disk_info *ndisks; - unsigned long cpu; int err; struct kmem_cache *sc; int i; @@ -2109,13 +2152,11 @@ static int resize_stripes(struct r5conf *conf, int newsize) return -ENOMEM; for (i = conf->max_nr_stripes; i; i--) { - nsh = kmem_cache_zalloc(sc, GFP_KERNEL); + nsh = alloc_stripe(sc, GFP_KERNEL); if (!nsh) break; nsh->raid_conf = conf; - spin_lock_init(&nsh->stripe_lock); - list_add(&nsh->lru, &newstripes); } if (i) { @@ -2142,13 +2183,11 @@ static int resize_stripes(struct r5conf *conf, int newsize) lock_device_hash_lock(conf, hash)); osh = get_free_stripe(conf, hash); unlock_device_hash_lock(conf, hash); - atomic_set(&nsh->count, 1); + for(i=0; i<conf->pool_size; i++) { nsh->dev[i].page = osh->dev[i].page; nsh->dev[i].orig_page = osh->dev[i].page; } - for( ; i<newsize; i++) - nsh->dev[i].page = NULL; nsh->hash_lock_index = hash; kmem_cache_free(conf->slab_cache, osh); cnt++; @@ -2174,25 +2213,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; - get_online_cpus(); - for_each_present_cpu(cpu) { - struct raid5_percpu *percpu; - struct flex_array *scribble; - - percpu = per_cpu_ptr(conf->percpu, cpu); - scribble = scribble_alloc(newsize, conf->chunk_sectors / - STRIPE_SECTORS, GFP_NOIO); - - if (scribble) { - flex_array_free(percpu->scribble); - percpu->scribble = scribble; - } else { - err = -ENOMEM; - break; - } - } - put_online_cpus(); - /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); @@ -2212,7 +2232,8 @@ static int resize_stripes(struct r5conf *conf, int newsize) conf->slab_cache = sc; conf->active_name = 1-conf->active_name; - conf->pool_size = newsize; + if (!err) + conf->pool_size = newsize; return err; } @@ -2434,7 +2455,7 @@ static void raid5_end_write_request(struct bio *bi, int error) } rdev_dec_pending(rdev, conf->mddev); - if (sh->batch_head && !uptodate) + if (sh->batch_head && !uptodate && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) @@ -2976,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", (unsigned long long)(*bip)->bi_iter.bi_sector, (unsigned long long)sh->sector, dd_idx); - spin_unlock_irq(&sh->stripe_lock); if (conf->mddev->bitmap && firstwrite) { + /* Cannot hold spinlock over bitmap_startwrite, + * but must ensure this isn't added to a batch until + * we have added to the bitmap and set bm_seq. + * So set STRIPE_BITMAP_PENDING to prevent + * batching. + * If multiple add_stripe_bio() calls race here they + * much all set STRIPE_BITMAP_PENDING. So only the first one + * to complete "bitmap_startwrite" gets to set + * STRIPE_BIT_DELAY. This is important as once a stripe + * is added to a batch, STRIPE_BIT_DELAY cannot be changed + * any more. + */ + set_bit(STRIPE_BITMAP_PENDING, &sh->state); + spin_unlock_irq(&sh->stripe_lock); bitmap_startwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0); - sh->bm_seq = conf->seq_flush+1; - set_bit(STRIPE_BIT_DELAY, &sh->state); + spin_lock_irq(&sh->stripe_lock); + clear_bit(STRIPE_BITMAP_PENDING, &sh->state); + if (!sh->batch_head) { + sh->bm_seq = conf->seq_flush+1; + set_bit(STRIPE_BIT_DELAY, &sh->state); + } } + spin_unlock_irq(&sh->stripe_lock); if (stripe_can_batch(sh)) stripe_add_to_batch_list(conf, sh); @@ -3278,7 +3317,9 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, /* reconstruct-write isn't being forced */ return 0; for (i = 0; i < s->failed; i++) { - if (!test_bit(R5_UPTODATE, &fdev[i]->flags) && + if (s->failed_num[i] != sh->pd_idx && + s->failed_num[i] != sh->qd_idx && + !test_bit(R5_UPTODATE, &fdev[i]->flags) && !test_bit(R5_OVERWRITE, &fdev[i]->flags)) return 1; } @@ -3298,6 +3339,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, */ BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); BUG_ON(test_bit(R5_Wantread, &dev->flags)); + BUG_ON(sh->batch_head); if ((s->uptodate == disks - 1) && (s->failed && (disk_idx == s->failed_num[0] || disk_idx == s->failed_num[1]))) { @@ -3366,7 +3408,6 @@ static void handle_stripe_fill(struct stripe_head *sh, { int i; - BUG_ON(sh->batch_head); /* look for blocks to read/compute, skip this if a compute * is already in flight, or if the stripe contents are in the * midst of changing due to a write @@ -3379,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh->state); } +static void break_stripe_batch_list(struct stripe_head *head_sh, + unsigned long handle_flags); /* handle_stripe_clean_event * any written block on an uptodate or failed drive can be returned. * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but @@ -3392,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf, int discard_pending = 0; struct stripe_head *head_sh = sh; bool do_endio = false; - int wakeup_nr = 0; for (i = disks; i--; ) if (sh->dev[i].written) { @@ -3481,44 +3523,8 @@ unhash: if (atomic_dec_and_test(&conf->pending_full_writes)) md_wakeup_thread(conf->mddev->thread); - if (!head_sh->batch_head || !do_endio) - return; - for (i = 0; i < head_sh->disks; i++) { - if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) - wakeup_nr++; - } - while (!list_empty(&head_sh->batch_list)) { - int i; - sh = list_first_entry(&head_sh->batch_list, - struct stripe_head, batch_list); - list_del_init(&sh->batch_list); - - set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, - head_sh->state & ~((1 << STRIPE_ACTIVE) | - (1 << STRIPE_PREREAD_ACTIVE) | - STRIPE_EXPAND_SYNC_FLAG)); - sh->check_state = head_sh->check_state; - sh->reconstruct_state = head_sh->reconstruct_state; - for (i = 0; i < sh->disks; i++) { - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wakeup_nr++; - sh->dev[i].flags = head_sh->dev[i].flags; - } - - spin_lock_irq(&sh->stripe_lock); - sh->batch_head = NULL; - spin_unlock_irq(&sh->stripe_lock); - if (sh->state & STRIPE_EXPAND_SYNC_FLAG) - set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); - } - - spin_lock_irq(&head_sh->stripe_lock); - head_sh->batch_head = NULL; - spin_unlock_irq(&head_sh->stripe_lock); - wake_up_nr(&conf->wait_for_overlap, wakeup_nr); - if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG) - set_bit(STRIPE_HANDLE, &head_sh->state); + if (head_sh->batch_head && do_endio) + break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); } static void handle_stripe_dirtying(struct r5conf *conf, @@ -4159,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) static int clear_batch_ready(struct stripe_head *sh) { + /* Return '1' if this is a member of batch, or + * '0' if it is a lone stripe or a head which can now be + * handled. + */ struct stripe_head *tmp; if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) - return 0; + return (sh->batch_head && sh->batch_head != sh); spin_lock(&sh->stripe_lock); if (!sh->batch_head) { spin_unlock(&sh->stripe_lock); @@ -4189,46 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh) return 0; } -static void check_break_stripe_batch_list(struct stripe_head *sh) +static void break_stripe_batch_list(struct stripe_head *head_sh, + unsigned long handle_flags) { - struct stripe_head *head_sh, *next; + struct stripe_head *sh, *next; int i; + int do_wakeup = 0; - if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) - return; + list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { - head_sh = sh; - do { - sh = list_first_entry(&sh->batch_list, - struct stripe_head, batch_list); - BUG_ON(sh == head_sh); - } while (!test_bit(STRIPE_DEGRADED, &sh->state)); - - while (sh != head_sh) { - next = list_first_entry(&sh->batch_list, - struct stripe_head, batch_list); list_del_init(&sh->batch_list); - set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, - head_sh->state & ~((1 << STRIPE_ACTIVE) | - (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DEGRADED) | - STRIPE_EXPAND_SYNC_FLAG)); + WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | + (1 << STRIPE_SYNCING) | + (1 << STRIPE_REPLACED) | + (1 << STRIPE_PREREAD_ACTIVE) | + (1 << STRIPE_DELAYED) | + (1 << STRIPE_BIT_DELAY) | + (1 << STRIPE_FULL_WRITE) | + (1 << STRIPE_BIOFILL_RUN) | + (1 << STRIPE_COMPUTE_RUN) | + (1 << STRIPE_OPS_REQ_PENDING) | + (1 << STRIPE_DISCARD) | + (1 << STRIPE_BATCH_READY) | + (1 << STRIPE_BATCH_ERR) | + (1 << STRIPE_BITMAP_PENDING))); + WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | + (1 << STRIPE_REPLACED))); + + set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | + (1 << STRIPE_DEGRADED)), + head_sh->state & (1 << STRIPE_INSYNC)); + sh->check_state = head_sh->check_state; sh->reconstruct_state = head_sh->reconstruct_state; - for (i = 0; i < sh->disks; i++) + for (i = 0; i < sh->disks; i++) { + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + do_wakeup = 1; sh->dev[i].flags = head_sh->dev[i].flags & (~((1 << R5_WriteError) | (1 << R5_Overlap))); - + } spin_lock_irq(&sh->stripe_lock); sh->batch_head = NULL; spin_unlock_irq(&sh->stripe_lock); - - set_bit(STRIPE_HANDLE, &sh->state); + if (handle_flags == 0 || + sh->state & handle_flags) + set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); - - sh = next; } + spin_lock_irq(&head_sh->stripe_lock); + head_sh->batch_head = NULL; + spin_unlock_irq(&head_sh->stripe_lock); + for (i = 0; i < head_sh->disks; i++) + if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) + do_wakeup = 1; + if (head_sh->state & handle_flags) + set_bit(STRIPE_HANDLE, &head_sh->state); + + if (do_wakeup) + wake_up(&head_sh->raid_conf->wait_for_overlap); } static void handle_stripe(struct stripe_head *sh) @@ -4253,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh) return; } - check_break_stripe_batch_list(sh); + if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) + break_stripe_batch_list(sh, 0); if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { spin_lock(&sh->stripe_lock); @@ -4307,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh) if (s.failed > conf->max_degraded) { sh->check_state = 0; sh->reconstruct_state = 0; + break_stripe_batch_list(sh, 0); if (s.to_read+s.to_write+s.written) handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); if (s.syncing + s.replacing) @@ -6221,8 +6252,11 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu percpu->spare_page = alloc_page(GFP_KERNEL); if (!percpu->scribble) percpu->scribble = scribble_alloc(max(conf->raid_disks, - conf->previous_raid_disks), conf->chunk_sectors / - STRIPE_SECTORS, GFP_KERNEL); + conf->previous_raid_disks), + max(conf->chunk_sectors, + conf->prev_chunk_sectors) + / STRIPE_SECTORS, + GFP_KERNEL); if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) { free_scratch_buffer(conf, percpu); @@ -7198,6 +7232,15 @@ static int check_reshape(struct mddev *mddev) if (!check_stripe_cache(mddev)) return -ENOSPC; + if (mddev->new_chunk_sectors > mddev->chunk_sectors || + mddev->delta_disks > 0) + if (resize_chunks(conf, + conf->previous_raid_disks + + max(0, mddev->delta_disks), + max(mddev->new_chunk_sectors, + mddev->chunk_sectors) + ) < 0) + return -ENOMEM; return resize_stripes(conf, (conf->previous_raid_disks + mddev->delta_disks)); } @@ -7311,6 +7354,7 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 7dc0dd86074b..896d603ad0da 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -337,9 +337,12 @@ enum { STRIPE_ON_RELEASE_LIST, STRIPE_BATCH_READY, STRIPE_BATCH_ERR, + STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add + * to batch yet. + */ }; -#define STRIPE_EXPAND_SYNC_FLAG \ +#define STRIPE_EXPAND_SYNC_FLAGS \ ((1 << STRIPE_EXPAND_SOURCE) |\ (1 << STRIPE_EXPAND_READY) |\ (1 << STRIPE_EXPANDING) |\ |