summaryrefslogtreecommitdiff
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c130
1 files changed, 84 insertions, 46 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 100fb0c3114f..7ed6b9469f97 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -747,11 +747,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
+ * For such case, force the completed nbytes to be equal to
+ * the BIO size so that bio_advance() sets the BIO remaining
+ * size to 0 and we end up calling bio_endio() before returning.
*/
- if (bio->bi_iter.bi_size != nbytes)
+ if (bio->bi_iter.bi_size != nbytes) {
bio->bi_status = BLK_STS_IOERR;
- else
+ nbytes = bio->bi_iter.bi_size;
+ } else {
bio->bi_iter.bi_sector = rq->__sector;
+ }
}
bio_advance(bio, nbytes);
@@ -1500,14 +1505,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
+static bool blk_is_flush_data_rq(struct request *rq)
+{
+ return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
+}
+
static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{
/*
* If we find a request that isn't idle we know the queue is busy
* as it's checked in the iter.
* Return false to stop the iteration.
+ *
+ * In case of queue quiesce, if one flush data request is completed,
+ * don't count it as inflight given the flush sequence is suspended,
+ * and the original flush data request is invisible to driver, just
+ * like other pending requests because of quiesce
*/
- if (blk_mq_request_started(rq)) {
+ if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
+ blk_is_flush_data_rq(rq) &&
+ blk_mq_request_completed(rq))) {
bool *busy = priv;
*busy = true;
@@ -1848,6 +1865,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
__add_wait_queue(wq, wait);
/*
+ * Add one explicit barrier since blk_mq_get_driver_tag() may
+ * not imply barrier in case of failure.
+ *
+ * Order adding us to wait queue and allocating driver tag.
+ *
+ * The pair is the one implied in sbitmap_queue_wake_up() which
+ * orders clearing sbitmap tag bits and waitqueue_active() in
+ * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
+ *
+ * Otherwise, re-order of adding wait queue and getting driver tag
+ * may cause __sbitmap_queue_wake_up() to wake up nothing because
+ * the waitqueue_active() may not observe us in wait queue.
+ */
+ smp_mb();
+
+ /*
* It's possible that a tag was freed in the window between the
* allocation failure and adding the hardware queue to the wait
* queue.
@@ -2855,11 +2888,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
};
struct request *rq;
- if (unlikely(bio_queue_enter(bio)))
- return NULL;
-
if (blk_mq_attempt_bio_merge(q, bio, nsegs))
- goto queue_exit;
+ return NULL;
rq_qos_throttle(q, bio);
@@ -2875,35 +2905,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
-queue_exit:
- blk_queue_exit(q);
return NULL;
}
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
- struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+ struct bio *bio)
{
- struct request *rq;
- enum hctx_type type, hctx_type;
-
- if (!plug)
- return NULL;
- rq = rq_list_peek(&plug->cached_rq);
- if (!rq || rq->q != q)
- return NULL;
+ enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+ enum hctx_type hctx_type = rq->mq_hctx->type;
- if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
- *bio = NULL;
- return NULL;
- }
+ WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
- type = blk_mq_get_hctx_type((*bio)->bi_opf);
- hctx_type = rq->mq_hctx->type;
if (type != hctx_type &&
!(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
- return NULL;
- if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
- return NULL;
+ return false;
+ if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+ return false;
/*
* If any qos ->throttle() end up blocking, we will have flushed the
@@ -2911,11 +2929,11 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
* before we throttle.
*/
plug->cached_rq = rq_list_next(rq);
- rq_qos_throttle(q, *bio);
+ rq_qos_throttle(rq->q, bio);
- rq->cmd_flags = (*bio)->bi_opf;
+ rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
- return rq;
+ return true;
}
static void bio_set_ioprio(struct bio *bio)
@@ -2944,31 +2962,51 @@ void blk_mq_submit_bio(struct bio *bio)
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
- struct request *rq;
+ struct request *rq = NULL;
unsigned int nr_segs = 1;
blk_status_t ret;
bio = blk_queue_bounce(bio, q);
- if (bio_may_exceed_limits(bio, &q->limits)) {
- bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
- if (!bio)
- return;
- }
-
- if (!bio_integrity_prep(bio))
- return;
-
bio_set_ioprio(bio);
- rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
- if (!rq) {
- if (!bio)
+ if (plug) {
+ rq = rq_list_peek(&plug->cached_rq);
+ if (rq && rq->q != q)
+ rq = NULL;
+ }
+ if (rq) {
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ return;
+ }
+ if (!bio_integrity_prep(bio))
return;
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq))
+ if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
+ if (blk_mq_can_use_cached_rq(rq, plug, bio))
+ goto done;
+ percpu_ref_get(&q->q_usage_counter);
+ } else {
+ if (unlikely(bio_queue_enter(bio)))
+ return;
+ if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ goto fail;
+ }
+ if (!bio_integrity_prep(bio))
+ goto fail;
+ }
+
+ rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+ if (unlikely(!rq)) {
+fail:
+ blk_queue_exit(q);
+ return;
}
+done:
trace_block_getrq(bio);
rq_qos_track(q, rq, bio);