Re: [PATCH 6/8] ublk: implement ->queue_rqs()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Mar 26, 2025 at 02:30:56PM -0700, Caleb Sander Mateos wrote:
> On Mon, Mar 24, 2025 at 6:49 AM Ming Lei <ming.lei@xxxxxxxxxx> wrote:
> >
> > Implement ->queue_rqs() for improving perf in case of MQ.
> >
> > In this way, we just need to call io_uring_cmd_complete_in_task() once for
> > one batch, then both io_uring and ublk server can get exact batch from
> > client side.
> >
> > Follows IOPS improvement:
> >
> > - tests
> >
> >         tools/testing/selftests/ublk/kublk add -t null -q 2 [-z]
> >
> >         fio/t/io_uring -p0 /dev/ublkb0
> >
> > - results:
> >
> >         more than 10% IOPS boost observed
> >
> > Pass all ublk selftests, especially the io dispatch order test.
> >
> > Cc: Uday Shankar <ushankar@xxxxxxxxxxxxxxx>
> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> > ---
> >  drivers/block/ublk_drv.c | 85 ++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 77 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index 53a463681a41..86621fde7fde 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -83,6 +83,7 @@ struct ublk_rq_data {
> >  struct ublk_uring_cmd_pdu {
> >         struct ublk_queue *ubq;
> >         u16 tag;
> > +       struct rq_list list;
> >  };
> >
> >  /*
> > @@ -1258,6 +1259,32 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
> >         io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb);
> >  }
> >
> > +static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
> > +               unsigned int issue_flags)
> > +{
> > +       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
> > +       struct ublk_queue *ubq = pdu->ubq;
> > +       struct request *rq;
> > +
> > +       while ((rq = rq_list_pop(&pdu->list))) {
> > +               struct ublk_io *io = &ubq->ios[rq->tag];
> > +
> > +               ublk_rq_task_work_cb(io->cmd, issue_flags);
> 
> ublk_rq_task_work_cb() is duplicating the lookup of ubq, rq, and io.
> Could you factor out a helper that takes those values instead of cmd?
> 
> > +       }
> > +}
> > +
> > +static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l)
> > +{
> > +       struct request *rq = l->head;
> > +       struct ublk_io *io = &ubq->ios[rq->tag];
> > +       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
> > +
> > +       pdu->ubq = ubq;
> 
> Why does pdu->ubq need to be set here but not in ublk_queue_cmd()? I
> would have thought it would already be set to ubq because pdu comes
> from a rq belonging to this ubq.
> 
> > +       pdu->list = *l;
> > +       rq_list_init(l);
> > +       io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb);
> 
> Could store io->cmd in a variable to avoid looking it up twice.
> 
> > +}
> > +
> >  static enum blk_eh_timer_return ublk_timeout(struct request *rq)
> >  {
> >         struct ublk_queue *ubq = rq->mq_hctx->driver_data;
> > @@ -1296,16 +1323,13 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
> >         return BLK_EH_RESET_TIMER;
> >  }
> >
> > -static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
> > -               const struct blk_mq_queue_data *bd)
> > +static blk_status_t ublk_prep_rq_batch(struct request *rq)
> 
> naming nit: why "batch"?

All were handled in my local version.

> 
> >  {
> > -       struct ublk_queue *ubq = hctx->driver_data;
> > -       struct request *rq = bd->rq;
> > +       struct ublk_queue *ubq = rq->mq_hctx->driver_data;
> >         blk_status_t res;
> >
> > -       if (unlikely(ubq->fail_io)) {
> > +       if (unlikely(ubq->fail_io))
> >                 return BLK_STS_TARGET;
> > -       }
> >
> >         /* fill iod to slot in io cmd buffer */
> >         res = ublk_setup_iod(ubq, rq);
> > @@ -1324,17 +1348,58 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
> >         if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
> >                 return BLK_STS_IOERR;
> >
> > +       if (unlikely(ubq->canceling))
> > +               return BLK_STS_IOERR;
> 
> Why is ubq->cancelling treated differently for ->queue_rq() vs. ->queue_rqs()?

It is same, just ublk_queue_rqs() becomes simpler by letting ->queue_rqs()
to handle ubq->canceling.

Here it is really something which need to comment for ubq->canceling
handling, which has to be done after ->fail_io/->force_abort is dealt
with, otherwise IO hang is caused when removing disk.

That is one bug introduced in this patch.

> 
> > +
> > +       blk_mq_start_request(rq);
> > +       return BLK_STS_OK;
> > +}
> > +
> > +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
> > +               const struct blk_mq_queue_data *bd)
> > +{
> > +       struct ublk_queue *ubq = hctx->driver_data;
> > +       struct request *rq = bd->rq;
> > +       blk_status_t res;
> > +
> >         if (unlikely(ubq->canceling)) {
> >                 __ublk_abort_rq(ubq, rq);
> >                 return BLK_STS_OK;
> >         }
> >
> > -       blk_mq_start_request(bd->rq);
> > -       ublk_queue_cmd(ubq, rq);
> > +       res = ublk_prep_rq_batch(rq);
> > +       if (res != BLK_STS_OK)
> > +               return res;
> >
> > +       ublk_queue_cmd(ubq, rq);
> >         return BLK_STS_OK;
> >  }
> >
> > +static void ublk_queue_rqs(struct rq_list *rqlist)
> > +{
> > +       struct rq_list requeue_list = { };
> > +       struct rq_list submit_list = { };
> > +       struct ublk_queue *ubq = NULL;
> > +       struct request *req;
> > +
> > +       while ((req = rq_list_pop(rqlist))) {
> > +               struct ublk_queue *this_q = req->mq_hctx->driver_data;
> > +
> > +               if (ubq && ubq != this_q && !rq_list_empty(&submit_list))
> > +                       ublk_queue_cmd_list(ubq, &submit_list);
> > +               ubq = this_q;
> 
> Probably could avoid the extra ->driver_data dereference on every rq
> by comparing the mq_hctx pointers instead. The ->driver_data
> dereference could be moved to the ublk_queue_cmd_list() calls.

Yes.

> 
> > +
> > +               if (ublk_prep_rq_batch(req) == BLK_STS_OK)
> > +                       rq_list_add_tail(&submit_list, req);
> > +               else
> > +                       rq_list_add_tail(&requeue_list, req);
> > +       }
> > +
> > +       if (ubq && !rq_list_empty(&submit_list))
> > +               ublk_queue_cmd_list(ubq, &submit_list);
> > +       *rqlist = requeue_list;
> > +}
> > +
> >  static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
> >                 unsigned int hctx_idx)
> >  {
> > @@ -1347,6 +1412,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
> >
> >  static const struct blk_mq_ops ublk_mq_ops = {
> >         .queue_rq       = ublk_queue_rq,
> > +       .queue_rqs      = ublk_queue_rqs,
> >         .init_hctx      = ublk_init_hctx,
> >         .timeout        = ublk_timeout,
> >  };
> > @@ -3147,6 +3213,9 @@ static int __init ublk_init(void)
> >         BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
> >                         UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
> >
> > +       BUILD_BUG_ON(sizeof(struct ublk_uring_cmd_pdu) >
> > +                       sizeof_field(struct io_uring_cmd, pdu));
> 
> Looks like Uday also suggested this, but if you change
> ublk_get_uring_cmd_pdu() to use io_uring_cmd_to_pdu(), you get this
> check for free.

I have followed Uday's suggestion to patch ublk_get_uring_cmd_pdu(),
and will send out v2 soon.

Thanks,
Ming





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux