On Tue, May 20, 2025 at 10:40:01AM -0700, Caleb Sander Mateos wrote: > On Mon, May 19, 2025 at 9:55 PM Ming Lei <ming.lei@xxxxxxxxxx> wrote: > > > > UBLK_F_SUPPORT_ZERO_COPY requires ublk server to issue explicit buffer > > register/unregister uring_cmd for each IO, this way is not only inefficient, > > but also introduce dependency between buffer consumer and buffer register/ > > unregister uring_cmd, please see tools/testing/selftests/ublk/stripe.c > > in which backing file IO has to be issued one by one by IOSQE_IO_LINK. > > > > Prepare for adding feature UBLK_F_AUTO_BUF_REG for addressing the existing > > zero copy limitation: > > > > - register request buffer automatically to ublk uring_cmd's io_uring > > context before delivering io command to ublk server > > > > - unregister request buffer automatically from the ublk uring_cmd's > > io_uring context when completing the request > > > > - io_uring will unregister the buffer automatically when uring is > > exiting, so we needn't worry about accident exit > > > > For using this feature, ublk server has to create one sparse buffer table > > > > Reviewed-by: Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> > > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > > --- > > drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++++---- > > 1 file changed, 64 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > > index ae2f47dc8224..3e56a9d267fb 100644 > > --- a/drivers/block/ublk_drv.c > > +++ b/drivers/block/ublk_drv.c > > @@ -133,6 +133,14 @@ struct ublk_uring_cmd_pdu { > > */ > > #define UBLK_IO_FLAG_NEED_GET_DATA 0x08 > > > > +/* > > + * request buffer is registered automatically, so we have to unregister it > > + * before completing this request. > > + * > > + * io_uring will unregister buffer automatically for us during exiting. > > + */ > > +#define UBLK_IO_FLAG_AUTO_BUF_REG 0x10 > > + > > /* atomic RW with ubq->cancel_lock */ > > #define UBLK_IO_FLAG_CANCELED 0x80000000 > > > > @@ -205,6 +213,7 @@ struct ublk_params_header { > > __u32 types; > > }; > > > > +static void ublk_io_release(void *priv); > > static void ublk_stop_dev_unlocked(struct ublk_device *ub); > > static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); > > static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, > > @@ -619,6 +628,11 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) > > return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; > > } > > > > +static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq) > > +{ > > + return false; > > +} > > + > > static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) > > { > > return ubq->flags & UBLK_F_USER_COPY; > > @@ -626,7 +640,8 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) > > > > static inline bool ublk_need_map_io(const struct ublk_queue *ubq) > > { > > - return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); > > + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) && > > + !ublk_support_auto_buf_reg(ubq); > > } > > > > static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) > > @@ -637,8 +652,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) > > * > > * for zero copy, request buffer need to be registered to io_uring > > * buffer table, so reference is needed > > + * > > + * For auto buffer register, ublk server still may issue > > + * UBLK_IO_COMMIT_AND_FETCH_REQ before one registered buffer is used up, > > + * so reference is required too. > > */ > > - return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); > > + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq) || > > + ublk_support_auto_buf_reg(ubq); > > } > > > > static inline void ublk_init_req_ref(const struct ublk_queue *ubq, > > @@ -1155,6 +1175,35 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, > > blk_mq_end_request(rq, BLK_STS_IOERR); > > } > > > > +static bool ublk_auto_buf_reg(struct request *req, struct ublk_io *io, > > + unsigned int issue_flags) > > +{ > > + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); > > + int ret; > > + > > + ret = io_buffer_register_bvec(io->cmd, req, ublk_io_release, 0, > > + issue_flags); > > + if (ret) { > > + blk_mq_end_request(req, BLK_STS_IOERR); > > + return false; > > + } > > + /* one extra reference is dropped by ublk_io_release */ > > + refcount_set(&data->ref, 2); > > + io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; > > + return true; > > +} > > + > > +static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq, > > + struct request *req, struct ublk_io *io, > > + unsigned int issue_flags) > > +{ > > + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) > > + return ublk_auto_buf_reg(req, io, issue_flags); > > + > > + ublk_init_req_ref(ubq, req); > > + return true; > > +} > > + > > static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req, > > struct ublk_io *io) > > { > > @@ -1180,7 +1229,6 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req, > > mapped_bytes >> 9; > > } > > > > - ublk_init_req_ref(ubq, req); > > return true; > > } > > > > @@ -1226,7 +1274,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, > > if (!ublk_start_io(ubq, req, io)) > > return; > > > > - ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags); > > + if (ublk_prep_auto_buf_reg(ubq, req, io, issue_flags)) > > + ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags); > > } > > > > static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, > > @@ -1994,7 +2043,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, > > > > static int ublk_commit_and_fetch(const struct ublk_queue *ubq, > > struct ublk_io *io, struct io_uring_cmd *cmd, > > - const struct ublksrv_io_cmd *ub_cmd) > > + const struct ublksrv_io_cmd *ub_cmd, > > + unsigned int issue_flags) > > { > > struct request *req = io->req; > > > > @@ -2014,6 +2064,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, > > return -EINVAL; > > } > > > > + if (ublk_support_auto_buf_reg(ubq)) { > > + if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG) { > > + WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, 0, > > + issue_flags)); > > Since the io_ring_ctx is determined from the io_uring_cmd, this only > works if the UBLK_IO_COMMIT_AND_FETCH_REQ is submitted to the same > io_uring as the previous UBLK_IO_(COMMIT_AND_)FETCH_REQ for the ublk > I/O. It would be good to document that. And I would probably drop the > WARN_ON_ONCE() here, since it can be triggered from userspace. > > Otherwise, > Reviewed-by: Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> Thanks for the review! Yeah, I thought of this thing yesterday when working on TASK_NEUTEAL or IO_MIGRATION too, will send one fix later by tracking context id. Thanks, Ming