One of the optimizations in the block layer is that the software queues are bypassed if it is expected that the block driver will accept a request. This can cause request reordering even for requests submitted from the same CPU core. This patch preserves the order for sequential zoned writes submitted from a given CPU core by always inserting these requests into the appropriate software queue. Cc: Damien Le Moal <dlemoal@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> --- block/blk-mq.c | 27 +++++++++++++++++++++++++-- include/linux/blk-mq.h | 11 +++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b1d81839679f..445f2275eddb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1537,6 +1537,27 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) } EXPORT_SYMBOL(blk_mq_requeue_request); +/* + * Whether the block layer should preserve the order of @rq relative to other + * requests submitted to the same software queue. + */ +static bool blk_mq_preserve_order(struct request *rq) +{ + return rq->q->limits.features & BLK_FEAT_ORDERED_HWQ && + blk_rq_is_seq_zoned_write(rq); +} + +static bool blk_mq_preserve_order_for_list(struct list_head *list) +{ + struct request *rq; + + list_for_each_entry(rq, list, queuelist) + if (blk_mq_preserve_order(rq)) + return true; + + return false; +} + static void blk_mq_requeue_work(struct work_struct *work) { struct request_queue *q = @@ -2566,7 +2587,8 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, * Try to issue requests directly if the hw queue isn't busy to save an * extra enqueue & dequeue to the sw queue. */ - if (!hctx->dispatch_busy && !run_queue_async) { + if (!hctx->dispatch_busy && !run_queue_async && + !blk_mq_preserve_order_for_list(list)) { blk_mq_run_dispatch_ops(hctx->queue, blk_mq_try_issue_list_directly(hctx, list)); if (list_empty(list)) @@ -3215,7 +3237,8 @@ void blk_mq_submit_bio(struct bio *bio) hctx = rq->mq_hctx; if ((rq->rq_flags & RQF_USE_SCHED) || - (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { + (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync)) || + blk_mq_preserve_order(rq)) { blk_mq_insert_request(rq, 0); blk_mq_run_hw_queue(hctx, true); } else { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2a5a828f19a0..30d7cd1b0484 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1191,4 +1191,15 @@ static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) } void blk_dump_rq_flags(struct request *, char *); +static inline bool blk_rq_is_seq_zoned_write(struct request *rq) +{ + switch (req_op(rq)) { + case REQ_OP_WRITE: + case REQ_OP_WRITE_ZEROES: + return bdev_zone_is_seq(rq->q->disk->part0, blk_rq_pos(rq)); + default: + return false; + } +} + #endif /* BLK_MQ_H */