On Mon, Jun 16, 2025 at 09:01:54AM -0700, Caleb Sander Mateos wrote: > On Mon, Jun 16, 2025 at 6:11 AM Christoph Hellwig <hch@xxxxxxxxxxxxx> wrote: > > > > On Thu, Jun 12, 2025 at 06:28:47AM -0600, Jens Axboe wrote: > > > But ideally we'd have that, and just a plain doubly linked list on the > > > queue/dispatch side. Which makes the list handling there much easier to > > > follow, as per your patch. > > > > Quick hack from the weekend. This also never deletes the requests from > > the submission list for the queue_rqs case, so depending on the workload > > it should touch either the same amount of less cache lines as the > > existing version. Only very lightly tested, and ublk is broken and > > doesn't even compile as it's running out space in the io_uring pdu. > > I'll need help from someone who knows ublk for that. > > > > --- > > From 07e283303c63fcb694e828380a24ad51f225a228 Mon Sep 17 00:00:00 2001 > > From: Christoph Hellwig <hch@xxxxxx> > > Date: Fri, 13 Jun 2025 09:48:40 +0200 > > Subject: block: always use a list_head for requests > > > > Use standard double linked lists for the remaining lists of queued up > > requests. This removes a lot of hairy list manipulation code and allows > > east reverse walking of the lists, which is used in > > blk_attempt_plug_merge to improve the merging, and in blk_add_rq_to_plug > > to look at the correct request. > > > > XXX: ublk is broken right now, because there is no space in the io_uring > > pdu for the list backpointer. > > > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > --- > > block/blk-core.c | 2 +- > > block/blk-merge.c | 11 +--- > > block/blk-mq.c | 97 ++++++++++++++--------------------- > > drivers/block/null_blk/main.c | 16 +++--- > > drivers/block/ublk_drv.c | 43 +++++++--------- > > drivers/block/virtio_blk.c | 31 +++++------ > > drivers/nvme/host/pci.c | 32 ++++++------ > > include/linux/blk-mq.h | 2 +- > > include/linux/blkdev.h | 2 +- > > 9 files changed, 103 insertions(+), 133 deletions(-) > > > > diff --git a/block/blk-core.c b/block/blk-core.c > > index b862c66018f2..29aad939a1e3 100644 > > --- a/block/blk-core.c > > +++ b/block/blk-core.c > > @@ -1127,7 +1127,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) > > return; > > > > plug->cur_ktime = 0; > > - rq_list_init(&plug->mq_list); > > + INIT_LIST_HEAD(&plug->mq_list); > > rq_list_init(&plug->cached_rqs); > > plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); > > plug->rq_count = 0; > > diff --git a/block/blk-merge.c b/block/blk-merge.c > > index 70d704615be5..223941e9ec08 100644 > > --- a/block/blk-merge.c > > +++ b/block/blk-merge.c > > @@ -995,17 +995,10 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, > > struct blk_plug *plug = current->plug; > > struct request *rq; > > > > - if (!plug || rq_list_empty(&plug->mq_list)) > > + if (!plug) > > return false; > > > > - rq = plug->mq_list.tail; > > - if (rq->q == q) > > - return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == > > - BIO_MERGE_OK; > > - else if (!plug->multiple_queues) > > - return false; > > - > > - rq_list_for_each(&plug->mq_list, rq) { > > + list_for_each_entry_reverse(rq, &plug->mq_list, queuelist) { > > if (rq->q != q) > > continue; > > if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == > > diff --git a/block/blk-mq.c b/block/blk-mq.c > > index 4806b867e37d..6d56471d4346 100644 > > --- a/block/blk-mq.c > > +++ b/block/blk-mq.c > > @@ -1378,7 +1378,8 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) > > > > static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) > > { > > - struct request *last = rq_list_peek(&plug->mq_list); > > + struct request *last = > > + list_last_entry(&plug->mq_list, struct request, queuelist); > > > > if (!plug->rq_count) { > > trace_block_plug(rq->q); > > @@ -1398,7 +1399,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) > > */ > > if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS)) > > plug->has_elevator = true; > > - rq_list_add_tail(&plug->mq_list, rq); > > + list_add_tail(&rq->queuelist, &plug->mq_list); > > plug->rq_count++; > > } > > > > @@ -2780,15 +2781,15 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) > > return __blk_mq_issue_directly(hctx, rq, last); > > } > > > > -static void blk_mq_issue_direct(struct rq_list *rqs) > > +static void blk_mq_issue_direct(struct list_head *rqs) > > { > > struct blk_mq_hw_ctx *hctx = NULL; > > - struct request *rq; > > + struct request *rq, *n; > > int queued = 0; > > blk_status_t ret = BLK_STS_OK; > > > > - while ((rq = rq_list_pop(rqs))) { > > - bool last = rq_list_empty(rqs); > > + list_for_each_entry_safe(rq, n, rqs, queuelist) { > > + list_del_init(&rq->queuelist); > > > > if (hctx != rq->mq_hctx) { > > if (hctx) { > > @@ -2798,7 +2799,7 @@ static void blk_mq_issue_direct(struct rq_list *rqs) > > hctx = rq->mq_hctx; > > } > > > > - ret = blk_mq_request_issue_directly(rq, last); > > + ret = blk_mq_request_issue_directly(rq, list_empty(rqs)); > > switch (ret) { > > case BLK_STS_OK: > > queued++; > > @@ -2819,45 +2820,18 @@ static void blk_mq_issue_direct(struct rq_list *rqs) > > blk_mq_commit_rqs(hctx, queued, false); > > } > > > > -static void __blk_mq_flush_list(struct request_queue *q, struct rq_list *rqs) > > +static void __blk_mq_flush_list(struct request_queue *q, struct list_head *rqs) > > { > > if (blk_queue_quiesced(q)) > > return; > > q->mq_ops->queue_rqs(rqs); > > } > > > > -static unsigned blk_mq_extract_queue_requests(struct rq_list *rqs, > > - struct rq_list *queue_rqs) > > -{ > > - struct request *rq = rq_list_pop(rqs); > > - struct request_queue *this_q = rq->q; > > - struct request **prev = &rqs->head; > > - struct rq_list matched_rqs = {}; > > - struct request *last = NULL; > > - unsigned depth = 1; > > - > > - rq_list_add_tail(&matched_rqs, rq); > > - while ((rq = *prev)) { > > - if (rq->q == this_q) { > > - /* move rq from rqs to matched_rqs */ > > - *prev = rq->rq_next; > > - rq_list_add_tail(&matched_rqs, rq); > > - depth++; > > - } else { > > - /* leave rq in rqs */ > > - prev = &rq->rq_next; > > - last = rq; > > - } > > - } > > - > > - rqs->tail = last; > > - *queue_rqs = matched_rqs; > > - return depth; > > -} > > - > > -static void blk_mq_dispatch_queue_requests(struct rq_list *rqs, unsigned depth) > > +static void blk_mq_dispatch_queue_requests(struct list_head *rqs, > > + unsigned depth) > > { > > - struct request_queue *q = rq_list_peek(rqs)->q; > > + struct request *rq = list_first_entry(rqs, struct request, queuelist); > > + struct request_queue *q = rq->q; > > > > trace_block_unplug(q, depth, true); > > > > @@ -2869,39 +2843,35 @@ static void blk_mq_dispatch_queue_requests(struct rq_list *rqs, unsigned depth) > > */ > > if (q->mq_ops->queue_rqs) { > > blk_mq_run_dispatch_ops(q, __blk_mq_flush_list(q, rqs)); > > - if (rq_list_empty(rqs)) > > + if (list_empty(rqs)) > > return; > > } > > > > blk_mq_run_dispatch_ops(q, blk_mq_issue_direct(rqs)); > > } > > > > -static void blk_mq_dispatch_list(struct rq_list *rqs, bool from_sched) > > +static void blk_mq_dispatch_list(struct list_head *rqs, bool from_sched) > > { > > struct blk_mq_hw_ctx *this_hctx = NULL; > > struct blk_mq_ctx *this_ctx = NULL; > > - struct rq_list requeue_list = {}; > > + LIST_HEAD(list); > > + struct request *rq, *n; > > unsigned int depth = 0; > > bool is_passthrough = false; > > - LIST_HEAD(list); > > - > > - do { > > - struct request *rq = rq_list_pop(rqs); > > > > + list_for_each_entry_safe(rq, n, rqs, queuelist) { > > if (!this_hctx) { > > this_hctx = rq->mq_hctx; > > this_ctx = rq->mq_ctx; > > is_passthrough = blk_rq_is_passthrough(rq); > > } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx || > > is_passthrough != blk_rq_is_passthrough(rq)) { > > - rq_list_add_tail(&requeue_list, rq); > > continue; > > } > > - list_add_tail(&rq->queuelist, &list); > > + list_move_tail(&rq->queuelist, &list); > > depth++; > > - } while (!rq_list_empty(rqs)); > > + } > > > > - *rqs = requeue_list; > > trace_block_unplug(this_hctx->queue, depth, !from_sched); > > > > percpu_ref_get(&this_hctx->queue->q_usage_counter); > > @@ -2921,17 +2891,27 @@ static void blk_mq_dispatch_list(struct rq_list *rqs, bool from_sched) > > percpu_ref_put(&this_hctx->queue->q_usage_counter); > > } > > > > -static void blk_mq_dispatch_multiple_queue_requests(struct rq_list *rqs) > > +static void blk_mq_dispatch_multiple_queue_requests(struct list_head *rqs) > > { > > do { > > - struct rq_list queue_rqs; > > - unsigned depth; > > + struct request_queue *this_q = NULL; > > + struct request *rq, *n; > > + LIST_HEAD(queue_rqs); > > + unsigned depth = 0; > > + > > + list_for_each_entry_safe(rq, n, rqs, queuelist) { > > + if (!this_q) > > + this_q = rq->q; > > + if (this_q == rq->q) { > > + list_move_tail(&rq->queuelist, &queue_rqs); > > + depth++; > > + } > > + } > > > > - depth = blk_mq_extract_queue_requests(rqs, &queue_rqs); > > blk_mq_dispatch_queue_requests(&queue_rqs, depth); > > - while (!rq_list_empty(&queue_rqs)) > > + while (!list_empty(&queue_rqs)) > > blk_mq_dispatch_list(&queue_rqs, false); > > - } while (!rq_list_empty(rqs)); > > + } while (!list_empty(rqs)); > > } > > > > void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) > > @@ -2955,15 +2935,14 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) > > blk_mq_dispatch_multiple_queue_requests(&plug->mq_list); > > return; > > } > > - > > blk_mq_dispatch_queue_requests(&plug->mq_list, depth); > > - if (rq_list_empty(&plug->mq_list)) > > + if (list_empty(&plug->mq_list)) > > return; > > } > > > > do { > > blk_mq_dispatch_list(&plug->mq_list, from_schedule); > > - } while (!rq_list_empty(&plug->mq_list)); > > + } while (!list_empty(&plug->mq_list)); > > } > > > > static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, > > diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c > > index aa163ae9b2aa..ce3ac928122f 100644 > > --- a/drivers/block/null_blk/main.c > > +++ b/drivers/block/null_blk/main.c > > @@ -1694,22 +1694,22 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, > > return BLK_STS_OK; > > } > > > > -static void null_queue_rqs(struct rq_list *rqlist) > > +static void null_queue_rqs(struct list_head *rqlist) > > { > > - struct rq_list requeue_list = {}; > > struct blk_mq_queue_data bd = { }; > > + LIST_HEAD(requeue_list); > > + struct request *rq, *n; > > blk_status_t ret; > > > > - do { > > - struct request *rq = rq_list_pop(rqlist); > > - > > + list_for_each_entry_safe(rq, n, rqlist, queuelist) { > > bd.rq = rq; > > ret = null_queue_rq(rq->mq_hctx, &bd); > > if (ret != BLK_STS_OK) > > - rq_list_add_tail(&requeue_list, rq); > > - } while (!rq_list_empty(rqlist)); > > + list_move_tail(&rq->queuelist, &requeue_list); > > + } > > > > - *rqlist = requeue_list; > > + INIT_LIST_HEAD(rqlist); > > + list_splice(&requeue_list, rqlist); > > } > > > > static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > > index c637ea010d34..4d5b88ca7b1b 100644 > > --- a/drivers/block/ublk_drv.c > > +++ b/drivers/block/ublk_drv.c > > @@ -101,7 +101,7 @@ struct ublk_uring_cmd_pdu { > > */ > > union { > > struct request *req; > > - struct request *req_list; > > + struct list_head req_list; > > }; > > > > /* > > @@ -1325,24 +1325,18 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, > > unsigned int issue_flags) > > { > > struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > > - struct request *rq = pdu->req_list; > > - struct request *next; > > + struct request *rq, *n; > > > > - do { > > - next = rq->rq_next; > > - rq->rq_next = NULL; > > + list_for_each_entry_safe(rq, n, &pdu->req_list, queuelist) > > ublk_dispatch_req(rq->mq_hctx->driver_data, rq, issue_flags); > > - rq = next; > > - } while (rq); > > } > > > > -static void ublk_queue_cmd_list(struct ublk_io *io, struct rq_list *l) > > +static void ublk_queue_cmd_list(struct ublk_io *io, struct list_head *rqlist) > > { > > struct io_uring_cmd *cmd = io->cmd; > > struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > > > > - pdu->req_list = rq_list_peek(l); > > - rq_list_init(l); > > + list_splice(&pdu->req_list, rqlist); > > io_uring_cmd_complete_in_task(cmd, ublk_cmd_list_tw_cb); > > ublk_cmd_list_tw_cb() doesn't need a doubly-linked list. It should be > fine to continue storing just the first struct request * of the list > in struct ublk_uring_cmd_pdu. That would avoid growing > ublk_uring_cmd_pdu past the 32-byte limit. Agree. ublk needn't re-order, and doubly-linked list is useless here. Thanks, Ming