ublk_register_io_buf() performs an expensive atomic refcount increment, as well as a lot of pointer chasing to look up the struct request. Create a separate ublk_daemon_register_io_buf() for the daemon task to call. Initialize ublk_io's reference count to a large number, introduce a field task_registered_buffers to count the buffers registered on the daemon task, and atomically subtract the large number minus task_registered_buffers in ublk_commit_and_fetch(). Also obtain the struct request directly from ublk_io's req field instead of looking it up on the tagset. Signed-off-by: Caleb Sander Mateos <csander@xxxxxxxxxxxxxxx> --- drivers/block/ublk_drv.c | 70 ++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f53618391141..b2925e15279a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -146,10 +146,17 @@ struct ublk_uring_cmd_pdu { #define UBLK_IO_FLAG_AUTO_BUF_REG 0x10 /* atomic RW with ubq->cancel_lock */ #define UBLK_IO_FLAG_CANCELED 0x80000000 +/* + * Initialize refcount to a large number to include any registered buffers. + * UBLK_IO_COMMIT_AND_FETCH_REQ will release these references minus those for + * any buffers registered on the io daemon task. + */ +#define UBLK_REFCOUNT_INIT (REFCOUNT_MAX / 2) + struct ublk_io { /* userspace buffer address from io cmd */ __u64 addr; unsigned int flags; int res; @@ -164,18 +171,21 @@ struct ublk_io { struct task_struct *task; /* * The number of uses of this I/O by the ublk server * if user copy or zero copy are enabled: - * - 1 from dispatch to the server until UBLK_IO_COMMIT_AND_FETCH_REQ + * - UBLK_REFCOUNT_INIT from dispatch to the server + * until UBLK_IO_COMMIT_AND_FETCH_REQ * - 1 for each inflight ublk_ch_{read,write}_iter() call - * - 1 for each io_uring registered buffer + * - 1 for each io_uring registered buffer not registered on task * The I/O can only be completed once all references are dropped. * User copy and buffer registration operations are only permitted * if the reference count is nonzero. */ refcount_t ref; + /* Count of buffers registered on task and not yet unregistered */ + unsigned task_registered_buffers; /* auto-registered buffer, valid if UBLK_IO_FLAG_AUTO_BUF_REG is set */ u16 buf_index; void *buf_ctx_handle; }; @@ -684,11 +694,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) static inline void ublk_init_req_ref(const struct ublk_queue *ubq, struct ublk_io *io) { if (ublk_need_req_ref(ubq)) - refcount_set(&io->ref, 1); + refcount_set(&io->ref, UBLK_REFCOUNT_INIT); } static inline bool ublk_get_req_ref(const struct ublk_queue *ubq, struct ublk_io *io) { @@ -707,10 +717,19 @@ static inline void ublk_put_req_ref(const struct ublk_queue *ubq, } else { __ublk_complete_rq(req); } } +static inline void ublk_sub_req_ref(struct ublk_io *io, struct request *req) +{ + unsigned sub_refs = UBLK_REFCOUNT_INIT - io->task_registered_buffers; + + io->task_registered_buffers = 0; + if (refcount_sub_and_test(sub_refs, &io->ref)) + __ublk_complete_rq(req); +} + static inline bool ublk_need_get_data(const struct ublk_queue *ubq) { return ubq->flags & UBLK_F_NEED_GET_DATA; } @@ -1188,11 +1207,10 @@ ublk_auto_buf_reg_fallback(const struct ublk_queue *ubq, struct ublk_io *io) { unsigned tag = io - ubq->ios; struct ublksrv_io_desc *iod = ublk_get_iod(ubq, tag); iod->op_flags |= UBLK_IO_F_NEED_REG_BUF; - refcount_set(&io->ref, 1); } static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req, struct ublk_io *io, unsigned int issue_flags) { @@ -1207,13 +1225,12 @@ static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req, return true; } blk_mq_end_request(req, BLK_STS_IOERR); return false; } - /* one extra reference is dropped by ublk_io_release */ - refcount_set(&io->ref, 2); + io->task_registered_buffers = 1; io->buf_ctx_handle = io_uring_cmd_ctx_handle(io->cmd); /* store buffer index in request payload */ io->buf_index = pdu->buf.index; io->flags |= UBLK_IO_FLAG_AUTO_BUF_REG; return true; @@ -1221,14 +1238,14 @@ static bool ublk_auto_buf_reg(const struct ublk_queue *ubq, struct request *req, static bool ublk_prep_auto_buf_reg(struct ublk_queue *ubq, struct request *req, struct ublk_io *io, unsigned int issue_flags) { + ublk_init_req_ref(ubq, io); if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) return ublk_auto_buf_reg(ubq, req, io, issue_flags); - ublk_init_req_ref(ubq, io); return true; } static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req, struct ublk_io *io) @@ -1488,10 +1505,11 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) put_task_struct(io->task); io->task = NULL; } WARN_ON_ONCE(refcount_read(&io->ref)); + WARN_ON_ONCE(io->task_registered_buffers); } } static int ublk_ch_open(struct inode *inode, struct file *filp) { @@ -2023,10 +2041,39 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } return 0; } +static int +ublk_daemon_register_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, struct ublk_io *io, + unsigned index, unsigned issue_flags) +{ + unsigned new_registered_buffers; + struct request *req = io->req; + int ret; + + /* + * Ensure there are still references for ublk_sub_req_ref() to release. + * If not, fall back on the thread-safe buffer registration. + */ + new_registered_buffers = io->task_registered_buffers + 1; + if (unlikely(new_registered_buffers >= UBLK_REFCOUNT_INIT)) + return ublk_register_io_buf(cmd, ubq, io, index, issue_flags); + + if (!ublk_support_zero_copy(ubq) || !ublk_rq_has_data(req)) + return -EINVAL; + + ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index, + issue_flags); + if (ret) + return ret; + + io->task_registered_buffers = new_registered_buffers; + return 0; +} + static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, const struct ublk_device *ub, unsigned int index, unsigned int issue_flags) { if (!(ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)) @@ -2146,11 +2193,14 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, req->__sector = ub_cmd->zone_append_lba; if (unlikely(blk_should_fake_timeout(req->q))) return 0; - ublk_put_req_ref(ubq, io, req); + if (ublk_need_req_ref(ubq)) + ublk_sub_req_ref(io, req); + else + __ublk_complete_rq(req); return 0; } static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io) { @@ -2244,11 +2294,12 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA)) goto out; switch (_IOC_NR(cmd_op)) { case UBLK_IO_REGISTER_IO_BUF: - return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr, issue_flags); + return ublk_daemon_register_io_buf(cmd, ubq, io, ub_cmd->addr, + issue_flags); case UBLK_IO_COMMIT_AND_FETCH_REQ: ret = ublk_commit_and_fetch(ubq, io, cmd, ub_cmd, issue_flags); if (ret) goto out; @@ -2473,10 +2524,11 @@ static void ublk_deinit_queue(struct ublk_device *ub, int q_id) for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; if (io->task) put_task_struct(io->task); WARN_ON_ONCE(refcount_read(&io->ref)); + WARN_ON_ONCE(io->task_registered_buffers); } if (ubq->io_cmd_buf) free_pages((unsigned long)ubq->io_cmd_buf, get_order(size)); } -- 2.45.2