On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@xxxxxxxxxx> wrote: > > UBLK_F_SUPPORT_ZERO_COPY requires ublk server to issue explicit buffer > register/unregister uring_cmd for each IO, this way is not only inefficient, > but also introduce dependency between buffer consumer and buffer register/ > unregister uring_cmd, please see tools/testing/selftests/ublk/stripe.c > in which backing file IO has to be issued one by one by IOSQE_IO_LINK. > > Prepare for adding feature UBLK_F_AUTO_BUF_REG for addressing the existing > zero copy limitation: > > - register request buffer automatically to ublk uring_cmd's io_uring > context before delivering io command to ublk server > > - unregister request buffer automatically from the ublk uring_cmd's > io_uring context when completing the request > > - io_uring will unregister the buffer automatically when uring is > exiting, so we needn't worry about accident exit > > For using this feature, ublk server has to create one sparse buffer table > > Reviewed-by: Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++++---- > 1 file changed, 64 insertions(+), 6 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index ae2f47dc8224..3e56a9d267fb 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -133,6 +133,14 @@ struct ublk_uring_cmd_pdu { > */ > #define UBLK_IO_FLAG_NEED_GET_DATA 0x08 > > +/* > + * request buffer is registered automatically, so we have to unregister it > + * before completing this request. > + * > + * io_uring will unregister buffer automatically for us during exiting. > + */ > +#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10 > + > /* atomic RW with ubq->cancel_lock */ > #define UBLK_IO_FLAG_CANCELED 0x80000000 > > @@ -205,6 +213,7 @@ struct ublk_params_header { > __u32 types; > }; > > +static void ublk_io_release(void *priv); > static void ublk_stop_dev_unlocked(struct ublk_device *ub); > static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); > static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, > @@ -619,6 +628,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) > return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; > } > > +static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq) > +{ > + return false; > +} > + > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) > { > return ubq->flags & UBLK_F_USER_COPY; > @@ -626,7 +640,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) > > static inline bool ublk_need_map_io(const struct ublk_queue *ubq) > { > - return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); > + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) && > + !ublk_support_auto_buf_reg(ubq); > } > > static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) > @@ -637,8 +652,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) > * > * for zero copy, request buffer need to be registered to io_uring > * buffer table, so reference is needed > + * > + * For auto buffer register, ublk server still may issue > + * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up, > + * so reference is required too. > */ > - return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); > + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) || > + ublk_support_auto_buf_reg(ubq); > } > > static inline void ublk_init_req_ref(const struct ublk_queue *ubq, > @@ -1155,6 +1175,35 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, > blk_mq_end_request(rq, BLK_STS_IOERR); > } > > +static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io, > + unsigned int issue_flags) > +{ > + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); > + int ret; > + > + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0, > + issue_flags); > + if (ret) { > + blk_mq_end_request(req, BLK_STS_IOERR); > + return false; > + } > + /* one extra reference is dropped by ublk_io_release */ > + refcount_set(&data->ref, 2); > + io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; > + return true; > +} > + > +static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq, > + struct request *req, struct ublk_io *io, > + unsigned int issue_flags) > +{ > + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) > + return ublk_auto_buf_reg(req, io, issue_flags); > + > + ublk_init_req_ref(ubq, req); > + return true; > +} > + > static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req, > struct ublk_io *io) > { > @@ -1180,7 +1229,6 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req, > mapped_bytes >> 9; > } > > - ublk_init_req_ref(ubq, req); > return true; > } > > @@ -1226,7 +1274,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, > if (!ublk_start_io(ubq, req, io)) > return; > > - ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags); > + if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags)) > + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags); > } > > static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, > @@ -1994,7 +2043,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, > > static int ublk_commit_and_fetch(const struct ublk_queue *ubq, > struct ublk_io *io, struct io_uring_cmd *cmd, > - const struct ublksrv_io_cmd *ub_cmd) > + const struct ublksrv_io_cmd *ub_cmd, > + unsigned int issue_flags) > { > struct request *req = io->req; > > @@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, > return -EINVAL; > } > > + if (ublk_support_auto_buf_reg(ubq)) { > + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { > + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0, > + issue_flags)); Since the io_ring_ctx is determined from the io_uring_cmd, this only works if the UBLK_IO_COMMIT_AND_FETCH_REQ is submitted to the same io_uring as the previous UBLK_IO_(COMMIT_AND_)FETCH_REQ for the ublk I/O. It would be good to document that. And I would probably drop the WARN_ON_ONCE() here, since it can be triggered from userspace. Otherwise, Reviewed-by: Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> > + io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG; > + } > + } > + > ublk_fill_io_cmd(io, cmd, ub_cmd->addr); > > /* now this cmd slot is owned by ublk driver */ > @@ -2110,7 +2168,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, > goto out; > break; > case UBLK_IO_COMMIT_AND_FETCH_REQ: > - ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd); > + ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags); > if (ret) > goto out; > break; > -- > 2.47.0 >