On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@xxxxxxxxxx> wrote: > > Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically > to local io_uring context with provided buffer index. > > Add UAPI structure `struct ublk_auto_buf_reg` for holding user parameter > to register request buffer automatically, one 'flags' field is defined, and > there is still 32bit available for future extension, such as, adding one > io_ring FD field for registering buffer to external io_uring. > > `struct ublk_auto_buf_reg` is populated from ublk uring_cmd's sqe->addr, > and all existing ublk commands are data-less, so it is just fine to reuse > sqe->addr for this purpose. > > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 56 ++++++++++++++++++++++++++---- > include/uapi/linux/ublk_cmd.h | 64 +++++++++++++++++++++++++++++++++++ > 2 files changed, 113 insertions(+), 7 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 3e56a9d267fb..1aabc655652b 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -66,7 +66,8 @@ > | UBLK_F_USER_COPY \ > | UBLK_F_ZONED \ > | UBLK_F_USER_RECOVERY_FAIL_IO \ > - | UBLK_F_UPDATE_SIZE) > + | UBLK_F_UPDATE_SIZE \ > + | UBLK_F_AUTO_BUF_REG) > > #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \ > | UBLK_F_USER_RECOVERY_REISSUE \ > @@ -80,6 +81,9 @@ > > struct ublk_rq_data { > refcount_t ref; > + > + /* for auto-unregister buffer in case of UBLK_F_AUTO_BUF_REG */ > + u16 buf_index; > }; > > struct ublk_uring_cmd_pdu { > @@ -101,6 +105,9 @@ struct ublk_uring_cmd_pdu { > * setup in ublk uring_cmd handler > */ > struct ublk_queue *ubq; > + > + struct ublk_auto_buf_reg buf; > + > u16 tag; > }; > > @@ -630,7 +637,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) > > static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq) > { > - return false; > + return ubq->flags & UBLK_F_AUTO_BUF_REG; > } > > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) > @@ -1178,17 +1185,20 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, > static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io, > unsigned int issue_flags) > { > + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); > struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); > int ret; > > - ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0, > - issue_flags); > + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, > + pdu->buf.index, issue_flags); > if (ret) { > blk_mq_end_request(req, BLK_STS_IOERR); > return false; > } > /* one extra reference is dropped by ublk_io_release */ > refcount_set(&data->ref, 2); > + /* store buffer index in request payload */ > + data->buf_index = pdu->buf.index; > io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; > return true; > } > @@ -1952,6 +1962,18 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd, > io_uring_cmd_mark_cancelable(cmd, issue_flags); > } > > +static inline int ublk_set_auto_buf_reg(struct io_uring_cmd *cmd) > +{ > + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > + > + pdu->buf = ublk_sqe_addr_to_auto_buf_reg(READ_ONCE(cmd->sqe->addr)); > + > + if (pdu->buf.reserved0 || pdu->buf.reserved1) > + return -EINVAL; > + > + return 0; > +} > + > static void ublk_io_release(void *priv) > { > struct request *rq = priv; > @@ -2034,6 +2056,12 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, > goto out; > } > > + if (ublk_support_auto_buf_reg(ubq)) { > + ret = ublk_set_auto_buf_reg(cmd); > + if (ret) > + return ret; This should be goto out; to ensure the ub->mutex is released. Otherwise, this looks good to me. > + } > + > ublk_fill_io_cmd(io, cmd, buf_addr); > ublk_mark_io_ready(ub, ubq); > out: > @@ -2065,11 +2093,20 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, > } > > if (ublk_support_auto_buf_reg(ubq)) { > + int ret; > + > if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { > - WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0, > + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); > + > + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, > + data->buf_index, > issue_flags)); > io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG; > } > + > + ret = ublk_set_auto_buf_reg(cmd); > + if (ret) > + return ret; > } > > ublk_fill_io_cmd(io, cmd, ub_cmd->addr); > @@ -2791,8 +2828,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) > * For USER_COPY, we depends on userspace to fill request > * buffer by pwrite() to ublk char device, which can't be > * used for unprivileged device > + * > + * Same with zero copy or auto buffer register. > */ > - if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) > + if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY | > + UBLK_F_AUTO_BUF_REG)) > return -EINVAL; > } > > @@ -2850,7 +2890,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) > UBLK_F_URING_CMD_COMP_IN_TASK; > > /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ > - if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) > + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY | > + UBLK_F_AUTO_BUF_REG)) > ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; > > /* > @@ -3377,6 +3418,7 @@ static int __init ublk_init(void) > > BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET + > UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET); > + BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != 8); > > init_waitqueue_head(&ublk_idr_wq); > > diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h > index be5c6c6b16e0..f6f516b1223b 100644 > --- a/include/uapi/linux/ublk_cmd.h > +++ b/include/uapi/linux/ublk_cmd.h > @@ -219,6 +219,29 @@ > */ > #define UBLK_F_UPDATE_SIZE (1ULL << 10) > > +/* > + * request buffer is registered automatically to uring_cmd's io_uring > + * context before delivering this io command to ublk server, meantime > + * it is un-registered automatically when completing this io command. > + * > + * For using this feature: > + * > + * - ublk server has to create sparse buffer table > + * > + * - ublk server passes auto buf register data via uring_cmd's sqe->addr, > + * `struct ublk_auto_buf_reg` is populated from sqe->addr, please see > + * the definition of ublk_sqe_addr_to_auto_buf_reg() > + * > + * - pass buffer index from `ublk_auto_buf_reg.index` > + * > + * - all reserved fields in `ublk_auto_buf_reg` need to be zeroed > + * > + * This way avoids extra cost from two uring_cmd, but also simplifies backend > + * implementation, such as, the dependency on IO_REGISTER_IO_BUF and > + * IO_UNREGISTER_IO_BUF becomes not necessary. > + */ > +#define UBLK_F_AUTO_BUF_REG (1ULL << 11) > + > /* device state */ > #define UBLK_S_DEV_DEAD 0 > #define UBLK_S_DEV_LIVE 1 > @@ -339,6 +362,47 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) > return iod->op_flags >> 8; > } > > +struct ublk_auto_buf_reg { > + /* index for registering the delivered request buffer */ > + __u16 index; > + __u16 reserved0; nit: alignment > + > + /* > + * io_ring FD can be passed via the reserve field in future for > + * supporting to register io buffer to external io_uring > + */ > + __u32 reserved1; > +}; > + > +/* > + * For UBLK_F_AUTO_BUF_REG, auto buffer register data is carried via > + * uring_cmd's sqe->addr: > + * > + * - bit0 ~ bit15: buffer index > + * - bit24 ~ bit31: reserved0 "bit24" should be "bit16" here. It would change to "bit24" in the next patch adding UBLK_AUTO_BUF_REG_FALLBACK. > + * - bit32 ~ bit63: reserved1 > + */ > +static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( > + __u64 sqe_addr) > +{ > + struct ublk_auto_buf_reg reg = { > + .index = sqe_addr & 0xffff, > + .reserved0 = (sqe_addr >> 16) & 0xffff, I think I suggested this before, but the masking with 0xffff is unnecessary. Best, Caleb > + .reserved1 = sqe_addr >> 32, > + }; > + > + return reg; > +} > + > +static inline __u64 > +ublk_auto_buf_reg_to_sqe_addr(const struct ublk_auto_buf_reg *buf) > +{ > + __u64 addr = buf->index | (__u64)buf->reserved0 << 16 | > + (__u64)buf->reserved1 << 32; > + > + return addr; > +} > + > /* issued to ublk driver via /dev/ublkcN */ > struct ublksrv_io_cmd { > __u16 q_id; > -- > 2.47.0 >