Re: [PATCH V5 3/6] ublk: register buffer to local io_uring with provided buf index via UBLK_F_AUTO_BUF_REG

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@xxxxxxxxxx> wrote:
>
> Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
> to local io_uring context with provided buffer index.
>
> Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter
> to register request buffer automatically, one 'flags' field is defined, and
> there is still 32bit available for future extension, such as, adding one
> io_ring FD field for registering buffer to external io_uring.
>
> `struct ublk_auto_buf_reg` is populated from ublk uring_cmd's sqe->addr,
> and all existing ublk commands are data-less, so it is just fine to reuse
> sqe->addr for this purpose.
>
> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
> ---
>  drivers/block/ublk_drv.c      | 56 ++++++++++++++++++++++++++----
>  include/uapi/linux/ublk_cmd.h | 64 +++++++++++++++++++++++++++++++++++
>  2 files changed, 113 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 3e56a9d267fb..1aabc655652b 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -66,7 +66,8 @@
>                 | UBLK_F_USER_COPY \
>                 | UBLK_F_ZONED \
>                 | UBLK_F_USER_RECOVERY_FAIL_IO \
> -               | UBLK_F_UPDATE_SIZE)
> +               | UBLK_F_UPDATE_SIZE \
> +               | UBLK_F_AUTO_BUF_REG)
>
>  #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
>                 | UBLK_F_USER_RECOVERY_REISSUE \
> @@ -80,6 +81,9 @@
>
>  struct ublk_rq_data {
>         refcount_t ref;
> +
> +       /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */
> +       u16 buf_index;
>  };
>
>  struct ublk_uring_cmd_pdu {
> @@ -101,6 +105,9 @@ struct ublk_uring_cmd_pdu {
>          * setup in ublk uring_cmd handler
>          */
>         struct ublk_queue *ubq;
> +
> +       struct ublk_auto_buf_reg buf;
> +
>         u16 tag;
>  };
>
> @@ -630,7 +637,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
>
>  static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
>  {
> -       return false;
> +       return ubq->flags & UBLK_F_AUTO_BUF_REG;
>  }
>
>  static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
> @@ -1178,17 +1185,20 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
>  static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io,
>                               unsigned int issue_flags)
>  {
> +       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd);
>         struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
>         int ret;
>
> -       ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0,
> -                                     issue_flags);
> +       ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release,
> +                                     pdu->buf.index, issue_flags);
>         if (ret) {
>                 blk_mq_end_request(req, BLK_STS_IOERR);
>                 return false;
>         }
>         /* one extra reference is dropped by ublk_io_release */
>         refcount_set(&data->ref, 2);
> +       /* store buffer index in request payload */
> +       data->buf_index = pdu->buf.index;
>         io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG;
>         return true;
>  }
> @@ -1952,6 +1962,18 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
>         io_uring_cmd_mark_cancelable(cmd, issue_flags);
>  }
>
> +static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd)
> +{
> +       struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
> +
> +       pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr));
> +
> +       if (pdu->buf.reserved0 || pdu->buf.reserved1)
> +               return -EINVAL;
> +
> +       return 0;
> +}
> +
>  static void ublk_io_release(void *priv)
>  {
>         struct request *rq = priv;
> @@ -2034,6 +2056,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
>                 goto out;
>         }
>
> +       if (ublk_support_auto_buf_reg(ubq)) {
> +               ret = ublk_set_auto_buf_reg(cmd);
> +               if (ret)
> +                       return ret;

This should be goto out; to ensure the ub->mutex is released.
Otherwise, this looks good to me.

> +       }
> +
>         ublk_fill_io_cmd(io, cmd, buf_addr);
>         ublk_mark_io_ready(ub, ubq);
>  out:
> @@ -2065,11 +2093,20 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
>         }
>
>         if (ublk_support_auto_buf_reg(ubq)) {
> +               int ret;
> +
>                 if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) {
> -                       WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0,
> +                       struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
> +
> +                       WARN_ON_ONCE(io_buffer_unregister_bvec(cmd,
> +                                               data->buf_index,
>                                                 issue_flags));
>                         io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
>                 }
> +
> +               ret = ublk_set_auto_buf_reg(cmd);
> +               if (ret)
> +                       return ret;
>         }
>
>         ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
> @@ -2791,8 +2828,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
>                  * For USER_COPY, we depends on userspace to fill request
>                  * buffer by pwrite() to ublk char device, which can't be
>                  * used for unprivileged device
> +                *
> +                * Same with zero copy or auto buffer register.
>                  */
> -               if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> +               if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> +                                       UBLK_F_AUTO_BUF_REG))
>                         return -EINVAL;
>         }
>
> @@ -2850,7 +2890,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
>                 UBLK_F_URING_CMD_COMP_IN_TASK;
>
>         /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
> -       if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
> +       if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
> +                               UBLK_F_AUTO_BUF_REG))
>                 ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
>
>         /*
> @@ -3377,6 +3418,7 @@ static int __init ublk_init(void)
>
>         BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
>                         UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
> +       BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8);
>
>         init_waitqueue_head(&ublk_idr_wq);
>
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index be5c6c6b16e0..f6f516b1223b 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -219,6 +219,29 @@
>   */
>  #define UBLK_F_UPDATE_SIZE              (1ULL << 10)
>
> +/*
> + * request buffer is registered automatically to uring_cmd's io_uring
> + * context before delivering this io command to ublk server, meantime
> + * it is un-registered automatically when completing this io command.
> + *
> + * For using this feature:
> + *
> + * - ublk server has to create sparse buffer table
> + *
> + * - ublk server passes auto buf register data via uring_cmd's sqe->addr,
> + *   `struct ublk_auto_buf_reg` is populated from sqe->addr, please see
> + *   the definition of ublk_sqe_addr_to_auto_buf_reg()
> + *
> + * - pass buffer index from `ublk_auto_buf_reg.index`
> + *
> + * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed
> + *
> + * This way avoids extra cost from two uring_cmd, but also simplifies backend
> + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
> + * IO_UNREGISTER_IO_BUF becomes not necessary.
> + */
> +#define UBLK_F_AUTO_BUF_REG    (1ULL << 11)
> +
>  /* device state */
>  #define UBLK_S_DEV_DEAD        0
>  #define UBLK_S_DEV_LIVE        1
> @@ -339,6 +362,47 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
>         return iod->op_flags >> 8;
>  }
>
> +struct ublk_auto_buf_reg {
> +       /* index for registering the delivered request buffer */
> +       __u16  index;
> +       __u16   reserved0;

nit: alignment

> +
> +       /*
> +        * io_ring FD can be passed via the reserve field in future for
> +        * supporting to register io buffer to external io_uring
> +        */
> +       __u32  reserved1;
> +};
> +
> +/*
> + * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via
> + * uring_cmd's sqe->addr:
> + *
> + *     - bit0 ~ bit15: buffer index
> + *     - bit24 ~ bit31: reserved0

"bit24" should be "bit16" here. It would change to "bit24" in the next
patch adding UBLK_AUTO_BUF_REG_FALLBACK.

> + *     - bit32 ~ bit63: reserved1
> + */
> +static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg(
> +               __u64 sqe_addr)
> +{
> +       struct ublk_auto_buf_reg reg = {
> +               .index = sqe_addr & 0xffff,
> +               .reserved0 = (sqe_addr >> 16) & 0xffff,

I think I suggested this before, but the masking with 0xffff is unnecessary.

Best,
Caleb

> +               .reserved1 = sqe_addr >> 32,
> +       };
> +
> +       return reg;
> +}
> +
> +static inline __u64
> +ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf)
> +{
> +       __u64 addr = buf->index | (__u64)buf->reserved0 << 16 |
> +               (__u64)buf->reserved1 << 32;
> +
> +       return addr;
> +}
> +
>  /* issued to ublk driver via /dev/ublkcN */
>  struct ublksrv_io_cmd {
>         __u16   q_id;
> --
> 2.47.0
>





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux