[RFC PATCH 6/7] ublk: register buffer to specified io_uring & buf index via UBLK_F_AUTO_BUF_REG

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
to specified io_uring context and buffer index.

Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
 drivers/block/ublk_drv.c      | 56 ++++++++++++++++++++++++++++-------
 include/uapi/linux/ublk_cmd.h | 38 ++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 1fd20e481a60..e82618442749 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -66,7 +66,8 @@
 		| UBLK_F_USER_COPY \
 		| UBLK_F_ZONED \
 		| UBLK_F_USER_RECOVERY_FAIL_IO \
-		| UBLK_F_UPDATE_SIZE)
+		| UBLK_F_UPDATE_SIZE \
+		| UBLK_F_AUTO_BUF_REG)
 
 #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
 		| UBLK_F_USER_RECOVERY_REISSUE \
@@ -146,7 +147,10 @@ struct ublk_uring_cmd_pdu {
 
 struct ublk_io {
 	/* userspace buffer address from io cmd */
-	__u64	addr;
+	union {
+		__u64	addr;
+		struct ublk_auto_buf_reg buf;
+	};
 	unsigned int flags;
 	int res;
 
@@ -626,7 +630,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
 
 static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
 {
-	return false;
+	return ubq->flags & UBLK_F_AUTO_BUF_REG;
 }
 
 static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
@@ -1177,6 +1181,16 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
 		blk_mq_end_request(rq, BLK_STS_IOERR);
 }
 
+
+static inline void ublk_init_auto_buf_reg(const struct ublk_io *io,
+					  struct io_buf_data *data)
+{
+	data->index = io->buf.index;
+	data->ring_fd = io->buf.ring_fd;
+	data->has_fd = true;
+	data->registered_fd = io->buf.flags & UBLK_AUTO_BUF_REGISTERED_RING;
+}
+
 static bool ublk_auto_buf_reg(struct ublk_queue *ubq, struct request *req,
 			      struct ublk_io *io, unsigned int issue_flags)
 {
@@ -1187,6 +1201,9 @@ static bool ublk_auto_buf_reg(struct ublk_queue *ubq, struct request *req,
 	};
 	int ret;
 
+	if (ublk_support_auto_buf_reg(ubq))
+		ublk_init_auto_buf_reg(io, &data);
+
 	/* one extra reference is dropped by ublk_io_release */
 	ublk_init_req_ref(ubq, req, 2);
 	ret = io_buffer_register_bvec(io->cmd, &data, issue_flags);
@@ -2045,7 +2062,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
 		 */
 		if (!buf_addr && !ublk_need_get_data(ubq))
 			goto out;
-	} else if (buf_addr) {
+	} else if (buf_addr && !ublk_support_auto_buf_reg(ubq)) {
 		/* User copy requires addr to be unset */
 		ret = -EINVAL;
 		goto out;
@@ -2058,13 +2075,17 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
 	return ret;
 }
 
-static void ublk_auto_buf_unreg(struct ublk_io *io, struct io_uring_cmd *cmd,
+static void ublk_auto_buf_unreg(const struct ublk_queue *ubq,
+				struct ublk_io *io, struct io_uring_cmd *cmd,
 				struct request *req, unsigned int issue_flags)
 {
 	struct io_buf_data data = {
 		.index = req->tag,
 	};
 
+	if (ublk_support_auto_buf_reg(ubq))
+		ublk_init_auto_buf_reg(io, &data);
+
 	WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, &data, issue_flags));
 	io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
 }
@@ -2088,7 +2109,8 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
 		if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
 					req_op(req) == REQ_OP_READ))
 			return -EINVAL;
-	} else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
+	} else if ((req_op(req) != REQ_OP_ZONE_APPEND &&
+				!ublk_support_auto_buf_reg(ubq)) && ub_cmd->addr) {
 		/*
 		 * User copy requires addr to be unset when command is
 		 * not zone append
@@ -2097,7 +2119,7 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
 	}
 
 	if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG)
-		ublk_auto_buf_unreg(io, cmd, req, issue_flags);
+		ublk_auto_buf_unreg(ubq, io, cmd, req, issue_flags);
 
 	ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
 
@@ -2788,6 +2810,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
 	else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
 		return -EPERM;
 
+	/* F_AUTO_BUF_REG and F_SUPPORT_ZERO_COPY can't co-exist */
+	if ((info.flags & UBLK_F_AUTO_BUF_REG) &&
+			(info.flags & UBLK_F_SUPPORT_ZERO_COPY))
+		return -EINVAL;
+
 	/* forbid nonsense combinations of recovery flags */
 	switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) {
 	case 0:
@@ -2817,8 +2844,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
 		 * For USER_COPY, we depends on userspace to fill request
 		 * buffer by pwrite() to ublk char device, which can't be
 		 * used for unprivileged device
+		 *
+		 * Same with zero copy or auto buffer register.
 		 */
-		if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+		if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+					UBLK_F_AUTO_BUF_REG))
 			return -EINVAL;
 	}
 
@@ -2876,17 +2906,22 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
 		UBLK_F_URING_CMD_COMP_IN_TASK;
 
 	/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
-	if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+	if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+				UBLK_F_AUTO_BUF_REG))
 		ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
 
 	/*
 	 * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for
 	 * returning write_append_lba, which is only allowed in case of
 	 * user copy or zero copy
+	 *
+	 * UBLK_F_AUTO_BUF_REG can't be enabled for zoned because it need
+	 * the space for getting ring_fd and buffer index.
 	 */
 	if (ublk_dev_is_zoned(ub) &&
 	    (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags &
-	     (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) {
+	     (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ||
+	     (ub->dev_info.flags & UBLK_F_AUTO_BUF_REG))) {
 		ret = -EINVAL;
 		goto out_free_dev_number;
 	}
@@ -3403,6 +3438,7 @@ static int __init ublk_init(void)
 
 	BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
 			UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+	BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != sizeof(__u64));
 
 	init_waitqueue_head(&ublk_idr_wq);
 
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index be5c6c6b16e0..3d7c8c69cf06 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -219,6 +219,30 @@
  */
 #define UBLK_F_UPDATE_SIZE		 (1ULL << 10)
 
+/*
+ * request buffer is registered automatically to ublk server specified
+ * io_uring context before delivering this io command to ublk server,
+ * meantime it is un-registered automatically when completing this io
+ * command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table
+ *
+ * - pass io_ring context FD from `ublksrv_io_cmd.buf.ring_fd`, and the FD
+ *   can be registered io_ring FD if `UBLK_AUTO_BUF_REGISTERED_RING` is set
+ *   in `ublksrv_io_cmd.flags`, or plain FD
+ *
+ * - pass buffer index from `ublksrv_io_cmd.buf.index`
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ *
+ * This feature isn't available for UBLK_F_ZONED
+ */
+#define UBLK_F_AUTO_BUF_REG 	(1ULL << 11)
+
 /* device state */
 #define UBLK_S_DEV_DEAD	0
 #define UBLK_S_DEV_LIVE	1
@@ -339,6 +363,14 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
 	return iod->op_flags >> 8;
 }
 
+struct ublk_auto_buf_reg {
+	__s32  ring_fd;
+	__u16  index;
+#define UBLK_AUTO_BUF_REGISTERED_RING            (1 << 0)
+	__u8   flags;
+	__u8   _pad;
+};
+
 /* issued to ublk driver via /dev/ublkcN */
 struct ublksrv_io_cmd {
 	__u16	q_id;
@@ -363,6 +395,12 @@ struct ublksrv_io_cmd {
 		 */
 		__u64	addr;
 		__u64	zone_append_lba;
+
+		/*
+		 * for AUTO_BUF_REG feature, F_ZONED can't be supported,
+		 * and ->addr isn't used for zero copy
+		 */
+		struct ublk_auto_buf_reg auto_buf;
 	};
 };
 
-- 
2.47.0





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux