On Mon, Mar 24, 2025 at 6:49 AM Ming Lei <ming.lei@xxxxxxxxxx> wrote: > > Implement ->queue_rqs() for improving perf in case of MQ. > > In this way, we just need to call io_uring_cmd_complete_in_task() once for > one batch, then both io_uring and ublk server can get exact batch from > client side. > > Follows IOPS improvement: > > - tests > > tools/testing/selftests/ublk/kublk add -t null -q 2 [-z] > > fio/t/io_uring -p0 /dev/ublkb0 > > - results: > > more than 10% IOPS boost observed > > Pass all ublk selftests, especially the io dispatch order test. > > Cc: Uday Shankar <ushankar@xxxxxxxxxxxxxxx> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 85 ++++++++++++++++++++++++++++++++++++---- > 1 file changed, 77 insertions(+), 8 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 53a463681a41..86621fde7fde 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -83,6 +83,7 @@ struct ublk_rq_data { > struct ublk_uring_cmd_pdu { > struct ublk_queue *ubq; > u16 tag; > + struct rq_list list; > }; > > /* > @@ -1258,6 +1259,32 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) > io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); > } > > +static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, > + unsigned int issue_flags) > +{ > + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > + struct ublk_queue *ubq = pdu->ubq; > + struct request *rq; > + > + while ((rq = rq_list_pop(&pdu->list))) { > + struct ublk_io *io = &ubq->ios[rq->tag]; > + > + ublk_rq_task_work_cb(io->cmd, issue_flags); ublk_rq_task_work_cb() is duplicating the lookup of ubq, rq, and io. Could you factor out a helper that takes those values instead of cmd? > + } > +} > + > +static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) > +{ > + struct request *rq = l->head; > + struct ublk_io *io = &ubq->ios[rq->tag]; > + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); > + > + pdu->ubq = ubq; Why does pdu->ubq need to be set here but not in ublk_queue_cmd()? I would have thought it would already be set to ubq because pdu comes from a rq belonging to this ubq. > + pdu->list = *l; > + rq_list_init(l); > + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); Could store io->cmd in a variable to avoid looking it up twice. > +} > + > static enum blk_eh_timer_return ublk_timeout(struct request *rq) > { > struct ublk_queue *ubq = rq->mq_hctx->driver_data; > @@ -1296,16 +1323,13 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) > return BLK_EH_RESET_TIMER; > } > > -static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > - const struct blk_mq_queue_data *bd) > +static blk_status_t ublk_prep_rq_batch(struct request *rq) naming nit: why "batch"? > { > - struct ublk_queue *ubq = hctx->driver_data; > - struct request *rq = bd->rq; > + struct ublk_queue *ubq = rq->mq_hctx->driver_data; > blk_status_t res; > > - if (unlikely(ubq->fail_io)) { > + if (unlikely(ubq->fail_io)) > return BLK_STS_TARGET; > - } > > /* fill iod to slot in io cmd buffer */ > res = ublk_setup_iod(ubq, rq); > @@ -1324,17 +1348,58 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) > return BLK_STS_IOERR; > > + if (unlikely(ubq->canceling)) > + return BLK_STS_IOERR; Why is ubq->cancelling treated differently for ->queue_rq() vs. ->queue_rqs()? > + > + blk_mq_start_request(rq); > + return BLK_STS_OK; > +} > + > +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *bd) > +{ > + struct ublk_queue *ubq = hctx->driver_data; > + struct request *rq = bd->rq; > + blk_status_t res; > + > if (unlikely(ubq->canceling)) { > __ublk_abort_rq(ubq, rq); > return BLK_STS_OK; > } > > - blk_mq_start_request(bd->rq); > - ublk_queue_cmd(ubq, rq); > + res = ublk_prep_rq_batch(rq); > + if (res != BLK_STS_OK) > + return res; > > + ublk_queue_cmd(ubq, rq); > return BLK_STS_OK; > } > > +static void ublk_queue_rqs(struct rq_list *rqlist) > +{ > + struct rq_list requeue_list = { }; > + struct rq_list submit_list = { }; > + struct ublk_queue *ubq = NULL; > + struct request *req; > + > + while ((req = rq_list_pop(rqlist))) { > + struct ublk_queue *this_q = req->mq_hctx->driver_data; > + > + if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) > + ublk_queue_cmd_list(ubq, &submit_list); > + ubq = this_q; Probably could avoid the extra ->driver_data dereference on every rq by comparing the mq_hctx pointers instead. The ->driver_data dereference could be moved to the ublk_queue_cmd_list() calls. > + > + if (ublk_prep_rq_batch(req) == BLK_STS_OK) > + rq_list_add_tail(&submit_list, req); > + else > + rq_list_add_tail(&requeue_list, req); > + } > + > + if (ubq && !rq_list_empty(&submit_list)) > + ublk_queue_cmd_list(ubq, &submit_list); > + *rqlist = requeue_list; > +} > + > static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, > unsigned int hctx_idx) > { > @@ -1347,6 +1412,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, > > static const struct blk_mq_ops ublk_mq_ops = { > .queue_rq = ublk_queue_rq, > + .queue_rqs = ublk_queue_rqs, > .init_hctx = ublk_init_hctx, > .timeout = ublk_timeout, > }; > @@ -3147,6 +3213,9 @@ static int __init ublk_init(void) > BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET + > UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET); > > + BUILD_BUG_ON(sizeof(struct ublk_uring_cmd_pdu) > > + sizeof_field(struct io_uring_cmd, pdu)); Looks like Uday also suggested this, but if you change ublk_get_uring_cmd_pdu() to use io_uring_cmd_to_pdu(), you get this check for free. Best, Caleb > + > init_waitqueue_head(&ublk_idr_wq); > > ret = misc_register(&ublk_misc); > -- > 2.47.0 >