From: Yu Kuai <yukuai3@xxxxxxxxxx> For dispatch_request method, current behavior is dispatching one request at a time. In the case of multiple dispatching contexts, This behavior, on the one hand, introduce intense lock contention: t1: t2: t3: lock lock lock // grab lock ops.dispatch_request unlock // grab lock ops.dispatch_request unlock // grab lock ops.dispatch_request unlock on the other hand, messing up the requests dispatching order: t1: lock rq1 = ops.dispatch_request unlock t2: lock rq2 = ops.dispatch_request unlock lock rq3 = ops.dispatch_request unlock lock rq4 = ops.dispatch_request unlock //rq1,rq3 issue to disk // rq2, rq4 issue to disk In this case, the elevator dispatch order is rq 1-2-3-4, however, such order in disk is rq 1-3-2-4, the order for rq2 and rq3 is inversed. Fix those problems by introducing elevator_dispatch_requests(), this helper will grab the lock and dispatch a batch of requests while holding the lock. Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- block/blk-mq-sched.c | 65 +++++++++++++++++++++++++++++++++++++++++--- block/blk-mq.h | 18 ++++++++++++ 2 files changed, 79 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index e6305b680db9..3809ad880d49 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -192,6 +192,59 @@ static int blk_mq_finish_dispatch(struct sched_dispatch_ctx *ctx) return !!dispatched; } +static void blk_mq_dispatch_requests(struct sched_dispatch_ctx *ctx) +{ + struct request_queue *q = ctx->hctx->queue; + struct elevator_queue *e = q->elevator; + bool has_get_budget = q->mq_ops->get_budget != NULL; + int budget_token[BUDGET_TOKEN_BATCH]; + int count = q->nr_requests; + int i; + + while (true) { + if (!blk_mq_should_dispatch(ctx)) + return; + + if (has_get_budget) { + count = blk_mq_get_dispatch_budgets(q, budget_token); + if (count <= 0) + return; + } + + elevator_dispatch_lock(e); + for (i = 0; i < count; ++i) { + struct request *rq = + e->type->ops.dispatch_request(ctx->hctx); + + if (!rq) { + ctx->run_queue = true; + goto err_free_budgets; + } + + if (has_get_budget) + blk_mq_set_rq_budget_token(rq, budget_token[i]); + + list_add_tail(&rq->queuelist, &ctx->rq_list); + ctx->count++; + + if (rq->mq_hctx != ctx->hctx) + ctx->multi_hctxs = true; + + if (!blk_mq_get_driver_tag(rq)) { + i++; + goto err_free_budgets; + } + } + elevator_dispatch_unlock(e); + } + +err_free_budgets: + elevator_dispatch_unlock(e); + if (has_get_budget) + for (; i < count; ++i) + blk_mq_put_dispatch_budget(q, budget_token[i]); +} + /* * Only SCSI implements .get_budget and .put_budget, and SCSI restarts * its queue by itself in its completion handler, so we don't need to @@ -212,10 +265,14 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) else max_dispatch = hctx->queue->nr_requests; - do { - if (!blk_mq_dispatch_one_request(&ctx)) - break; - } while (ctx.count < max_dispatch); + if (!hctx->dispatch_busy && blk_queue_sq_sched(hctx->queue)) { + blk_mq_dispatch_requests(&ctx); + } else { + do { + if (!blk_mq_dispatch_one_request(&ctx)) + break; + } while (ctx.count < max_dispatch); + } return blk_mq_finish_dispatch(&ctx); } diff --git a/block/blk-mq.h b/block/blk-mq.h index af09eb617d11..1f35f9ec146d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -38,6 +38,7 @@ enum { }; #define BLK_MQ_CPU_WORK_BATCH (8) +#define BUDGET_TOKEN_BATCH (8) typedef unsigned int __bitwise blk_insert_t; #define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01) @@ -274,6 +275,23 @@ static inline int blk_mq_get_dispatch_budget(struct request_queue *q) return 0; } +static inline int blk_mq_get_dispatch_budgets(struct request_queue *q, + int *budget_token) +{ + int count = 0; + + while (count < BUDGET_TOKEN_BATCH) { + int token = q->mq_ops->get_budget(q); + + if (token < 0) + return count; + + budget_token[count++] = token; + } + + return count; +} + static inline void blk_mq_set_rq_budget_token(struct request *rq, int token) { if (token < 0) -- 2.39.2