On Mon, Sep 1, 2025 at 3:03 AM Ming Lei <ming.lei@xxxxxxxxxx> wrote: > > Add new command UBLK_U_IO_PREP_IO_CMDS, which is the batch version of > UBLK_IO_FETCH_REQ. > > Add new command UBLK_U_IO_COMMIT_IO_CMDS, which is for committing io command > result only, still the batch version. > > The new command header type is `struct ublk_batch_io`, and fixed buffer is > required for these two uring_cmd. The commit message could be clearer that it doesn't actually implement these commands yet, just validates the SQE fields. > > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > drivers/block/ublk_drv.c | 102 +++++++++++++++++++++++++++++++++- > include/uapi/linux/ublk_cmd.h | 49 ++++++++++++++++ > 2 files changed, 149 insertions(+), 2 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 46be5b656f22..4da0dbbd7e16 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -85,6 +85,11 @@ > UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \ > UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT) > > +#define UBLK_BATCH_F_ALL \ > + (UBLK_BATCH_F_HAS_ZONE_LBA | \ > + UBLK_BATCH_F_HAS_BUF_ADDR | \ > + UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) > + > struct ublk_uring_cmd_pdu { > /* > * Store requests in same batch temporarily for queuing them to > @@ -108,6 +113,11 @@ struct ublk_uring_cmd_pdu { > u16 tag; > }; > > +struct ublk_batch_io_data { > + struct ublk_queue *ubq; > + struct io_uring_cmd *cmd; > +}; > + > /* > * io command is active: sqe cmd is received, and its cqe isn't done > * > @@ -277,7 +287,7 @@ static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) > return ub->dev_info.flags & UBLK_F_ZONED; > } > > -static inline bool ublk_queue_is_zoned(struct ublk_queue *ubq) > +static inline bool ublk_queue_is_zoned(const struct ublk_queue *ubq) This change could go in a separate commit. > { > return ubq->flags & UBLK_F_ZONED; > } > @@ -2528,10 +2538,98 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > return ublk_ch_uring_cmd_local(cmd, issue_flags); > } > > +static int ublk_check_batch_cmd_flags(const struct ublk_batch_io *uc) > +{ > + const unsigned short mask = UBLK_BATCH_F_HAS_BUF_ADDR | > + UBLK_BATCH_F_HAS_ZONE_LBA; Can we use a fixed-size integer type, i.e. u16? > + > + if (uc->flags & ~UBLK_BATCH_F_ALL) > + return -EINVAL; > + > + /* UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK requires buffer index */ > + if ((uc->flags & UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) && > + (uc->flags & UBLK_BATCH_F_HAS_BUF_ADDR)) > + return -EINVAL; > + > + switch (uc->flags & mask) { > + case 0: > + if (uc->elem_bytes != 8) sizeof(struct ublk_elem_header)? > + return -EINVAL; > + break; > + case UBLK_BATCH_F_HAS_ZONE_LBA: > + case UBLK_BATCH_F_HAS_BUF_ADDR: > + if (uc->elem_bytes != 8 + 8) sizeof(u64)? > + return -EINVAL; > + break; > + case UBLK_BATCH_F_HAS_ZONE_LBA | UBLK_BATCH_F_HAS_BUF_ADDR: > + if (uc->elem_bytes != 8 + 8 + 8) > + return -EINVAL; So elem_bytes is redundant with flags? Do we really need a separate a separate field then? > + break; > + default: > + return -EINVAL; default case is unreachable? > + } > + > + return 0; > +} > + > +static int ublk_check_batch_cmd(const struct ublk_batch_io_data *data, > + const struct ublk_batch_io *uc) > +{ > + if (!(data->cmd->flags & IORING_URING_CMD_FIXED)) > + return -EINVAL; > + > + if (uc->nr_elem * uc->elem_bytes > data->cmd->sqe->len) Cast nr_elem and/or elem_bytes to u32 to avoid overflow concerns? Should also use READ_ONCE() to read the userspace-mapped sqe->len. > + return -E2BIG; > + > + if (uc->nr_elem > data->ubq->q_depth) > + return -E2BIG; > + > + if ((uc->flags & UBLK_BATCH_F_HAS_ZONE_LBA) && > + !ublk_queue_is_zoned(data->ubq)) > + return -EINVAL; > + > + if ((uc->flags & UBLK_BATCH_F_HAS_BUF_ADDR) && > + !ublk_need_map_io(data->ubq)) > + return -EINVAL; > + > + if ((uc->flags & UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) && > + !ublk_support_auto_buf_reg(data->ubq)) > + return -EINVAL; > + > + if (uc->reserved || uc->reserved2) > + return -EINVAL; > + > + return ublk_check_batch_cmd_flags(uc); > +} > + > static int ublk_ch_batch_io_uring_cmd(struct io_uring_cmd *cmd, > unsigned int issue_flags) > { > - return -EOPNOTSUPP; > + const struct ublk_batch_io *uc = io_uring_sqe_cmd(cmd->sqe); > + struct ublk_device *ub = cmd->file->private_data; > + struct ublk_batch_io_data data = { > + .cmd = cmd, > + }; > + u32 cmd_op = cmd->cmd_op; > + int ret = -EINVAL; > + > + if (uc->q_id >= ub->dev_info.nr_hw_queues) > + goto out; > + data.ubq = ublk_get_queue(ub, uc->q_id); Should be using READ_ONCE() to read from userspace-mapped memory. > + > + switch (cmd_op) { > + case UBLK_U_IO_PREP_IO_CMDS: > + case UBLK_U_IO_COMMIT_IO_CMDS: > + ret = ublk_check_batch_cmd(&data, uc); > + if (ret) > + goto out; > + ret = -EOPNOTSUPP; > + break; > + default: > + ret = -EOPNOTSUPP; > + } > +out: > + return ret; > } > > static inline bool ublk_check_ubuf_dir(const struct request *req, > diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h > index ec77dabba45b..01d3af52cfb4 100644 > --- a/include/uapi/linux/ublk_cmd.h > +++ b/include/uapi/linux/ublk_cmd.h > @@ -102,6 +102,10 @@ > _IOWR('u', 0x23, struct ublksrv_io_cmd) > #define UBLK_U_IO_UNREGISTER_IO_BUF \ > _IOWR('u', 0x24, struct ublksrv_io_cmd) > +#define UBLK_U_IO_PREP_IO_CMDS \ > + _IOWR('u', 0x25, struct ublk_batch_io) > +#define UBLK_U_IO_COMMIT_IO_CMDS \ > + _IOWR('u', 0x26, struct ublk_batch_io) > > /* only ABORT means that no re-fetch */ > #define UBLK_IO_RES_OK 0 > @@ -525,6 +529,51 @@ struct ublksrv_io_cmd { > }; > }; > > +struct ublk_elem_header { > + __u16 tag; /* IO tag */ > + > + /* > + * Buffer index for incoming io command, only valid iff > + * UBLK_F_AUTO_BUF_REG is set > + */ > + __u16 buf_index; > + __u32 result; /* I/O completion result (commit only) */ The result is unsigned? So there's no way to specify a request failure? > +}; > + > +/* > + * uring_cmd buffer structure Add "for batch commands"? > + * > + * buffer includes multiple elements, which number is specified by > + * `nr_elem`. Each element buffer is organized in the following order: > + * > + * struct ublk_elem_buffer { > + * // Mandatory fields (8 bytes) > + * struct ublk_elem_header header; > + * > + * // Optional fields (8 bytes each, included based on flags) > + * > + * // Buffer address (if UBLK_BATCH_F_HAS_BUF_ADDR) for copying data > + * // between ublk request and ublk server buffer > + * __u64 buf_addr; > + * > + * // returned Zone append LBA (if UBLK_BATCH_F_HAS_ZONE_LBA) > + * __u64 zone_lba; > + * } > + * > + * Used for `UBLK_U_IO_PREP_IO_CMDS` and `UBLK_U_IO_COMMIT_IO_CMDS` > + */ > +struct ublk_batch_io { > + __u16 q_id; So this doesn't allow batching completions across ublk queues? That seems like it significantly limits the usefulness of this feature. A ublk server thread may be handling ublk requests from a number of client threads which are submitting to different ublk queues. Best, Caleb > +#define UBLK_BATCH_F_HAS_ZONE_LBA (1 << 0) > +#define UBLK_BATCH_F_HAS_BUF_ADDR (1 << 1) > +#define UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK (1 << 2) > + __u16 flags; > + __u16 nr_elem; > + __u8 elem_bytes; > + __u8 reserved; > + __u64 reserved2; > +}; > + > struct ublk_param_basic { > #define UBLK_ATTR_READ_ONLY (1 << 0) > #define UBLK_ATTR_ROTATIONAL (1 << 1) > -- > 2.47.0 >