On Mon, Mar 24, 2025 at 09:49:01PM +0800, Ming Lei wrote: > Implement ->queue_rqs() for improving perf in case of MQ. > > In this way, we just need to call io_uring_cmd_complete_in_task() once for > one batch, then both io_uring and ublk server can get exact batch from > client side. > > Follows IOPS improvement: > > - tests > > tools/testing/selftests/ublk/kublk add -t null -q 2 [-z] > > fio/t/io_uring -p0 /dev/ublkb0 > > - results: > > more than 10% IOPS boost observed Nice! > > Pass all ublk selftests, especially the io dispatch order test. > > Cc: Uday Shankar <ushankar@xxxxxxxxxxxxxxx> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 85 ++++++++++++++++++++++++++++++++++++---- > 1 file changed, 77 insertions(+), 8 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 53a463681a41..86621fde7fde 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -83,6 +83,7 @@ struct ublk_rq_data { > struct ublk_uring_cmd_pdu { > struct ublk_queue *ubq; > u16 tag; > + struct rq_list list; > }; I think you'll have a hole after tag here and end up eating the full 32 bytes on x86_64. Maybe put list before tag to leave a bit of space for some small fields? Separately I think we should try to consolidate all the per-IO state. It is currently split across struct io_uring_cmd PDU, struct request and its PDU, and struct ublk_io. Maybe we can store everything in struct ublk_io and just put pointers to that where needed. > > /* > @@ -1258,6 +1259,32 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) > io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); > } > > +static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, > + unsigned int issue_flags) > +{ > + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > + struct ublk_queue *ubq = pdu->ubq; > + struct request *rq; > + > + while ((rq = rq_list_pop(&pdu->list))) { > + struct ublk_io *io = &ubq->ios[rq->tag]; > + > + ublk_rq_task_work_cb(io->cmd, issue_flags); Maybe rename to ublk_dispatch_one_rq or similar? > + } > +} > + > +static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) > +{ > + struct request *rq = l->head; > + struct ublk_io *io = &ubq->ios[rq->tag]; > + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); > + > + pdu->ubq = ubq; > + pdu->list = *l; > + rq_list_init(l); > + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); > +} > + > static enum blk_eh_timer_return ublk_timeout(struct request *rq) > { > struct ublk_queue *ubq = rq->mq_hctx->driver_data; > @@ -1296,16 +1323,13 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) > return BLK_EH_RESET_TIMER; > } > > -static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > - const struct blk_mq_queue_data *bd) > +static blk_status_t ublk_prep_rq_batch(struct request *rq) > { > - struct ublk_queue *ubq = hctx->driver_data; > - struct request *rq = bd->rq; > + struct ublk_queue *ubq = rq->mq_hctx->driver_data; > blk_status_t res; > > - if (unlikely(ubq->fail_io)) { > + if (unlikely(ubq->fail_io)) > return BLK_STS_TARGET; > - } > > /* fill iod to slot in io cmd buffer */ > res = ublk_setup_iod(ubq, rq); > @@ -1324,17 +1348,58 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) > return BLK_STS_IOERR; > > + if (unlikely(ubq->canceling)) > + return BLK_STS_IOERR; > + > + blk_mq_start_request(rq); > + return BLK_STS_OK; > +} > + > +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *bd) > +{ > + struct ublk_queue *ubq = hctx->driver_data; > + struct request *rq = bd->rq; > + blk_status_t res; > + > if (unlikely(ubq->canceling)) { > __ublk_abort_rq(ubq, rq); > return BLK_STS_OK; > } > > - blk_mq_start_request(bd->rq); > - ublk_queue_cmd(ubq, rq); > + res = ublk_prep_rq_batch(rq); > + if (res != BLK_STS_OK) > + return res; > > + ublk_queue_cmd(ubq, rq); > return BLK_STS_OK; > } > > +static void ublk_queue_rqs(struct rq_list *rqlist) > +{ > + struct rq_list requeue_list = { }; > + struct rq_list submit_list = { }; > + struct ublk_queue *ubq = NULL; > + struct request *req; > + > + while ((req = rq_list_pop(rqlist))) { > + struct ublk_queue *this_q = req->mq_hctx->driver_data; > + > + if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) > + ublk_queue_cmd_list(ubq, &submit_list); > + ubq = this_q; > + > + if (ublk_prep_rq_batch(req) == BLK_STS_OK) > + rq_list_add_tail(&submit_list, req); > + else > + rq_list_add_tail(&requeue_list, req); > + } > + > + if (ubq && !rq_list_empty(&submit_list)) > + ublk_queue_cmd_list(ubq, &submit_list); > + *rqlist = requeue_list; > +} > + > static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, > unsigned int hctx_idx) > { > @@ -1347,6 +1412,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, > > static const struct blk_mq_ops ublk_mq_ops = { > .queue_rq = ublk_queue_rq, > + .queue_rqs = ublk_queue_rqs, > .init_hctx = ublk_init_hctx, > .timeout = ublk_timeout, > }; > @@ -3147,6 +3213,9 @@ static int __init ublk_init(void) > BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET + > UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET); > > + BUILD_BUG_ON(sizeof(struct ublk_uring_cmd_pdu) > > + sizeof_field(struct io_uring_cmd, pdu)); > + Consider putting this near the struct ublk_uring_cmd_pdu definition or in ublk_get_uring_cmd_pdu. There are helpers provided by io_uring (io_uring_cmd_private_sz_check and io_uring_cmd_to_pdu) you can use. > init_waitqueue_head(&ublk_idr_wq); > > ret = misc_register(&ublk_misc); > -- > 2.47.0 >