Re: [PATCH 4/5] block: add support for vectored copies

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/22/25 00:31, Keith Busch wrote:
From: Keith Busch <kbusch@xxxxxxxxxx>

Copy offload can be used to defrad or garbage collect data spread across

Defrag?

the disk. Most storage protocols provide a way to specifiy multiple
sources in a single copy commnd, so introduce kernel and user space
interfaces to accomplish that.

Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
---
  block/blk-lib.c         | 50 ++++++++++++++++++++++++----------
  block/ioctl.c           | 59 +++++++++++++++++++++++++++++++++++++++++
  include/linux/blkdev.h  |  2 ++
  include/uapi/linux/fs.h | 14 ++++++++++
  4 files changed, 111 insertions(+), 14 deletions(-)

Any specific reason why this is a different patch, and not folded into
patch 2? It really feels odd to continuously updating interfaces which
have been added with the same patchset...

diff --git a/block/blk-lib.c b/block/blk-lib.c
index a538acbaa2cd7..7513b876a5399 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -424,26 +424,46 @@ static int __blkdev_copy(struct block_device *bdev, sector_t dst_sector,
  }
static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
-		sector_t src_sector, sector_t nr_sects, gfp_t gfp)
+		struct bio_vec *bv, int nr_vecs, gfp_t gfp)
  {
+	unsigned size = 0;
  	struct bio *bio;
-	int ret;
-
-	struct bio_vec bv = {
-		.bv_sector = src_sector,
-		.bv_sectors = nr_sects,
-	};
+	int ret, i;
- bio = bio_alloc(bdev, 1, REQ_OP_COPY, gfp);
-	bio_add_copy_src(bio, &bv);
+	bio = bio_alloc(bdev, nr_vecs, REQ_OP_COPY, gfp);
+	for (i = 0; i < nr_vecs; i++) {
+		size += bv[i].bv_sectors << SECTOR_SHIFT;
+		bio_add_copy_src(bio, &bv[i]);
+	}
  	bio->bi_iter.bi_sector = dst_sector;
-	bio->bi_iter.bi_size = nr_sects << SECTOR_SHIFT;
+	bio->bi_iter.bi_size = size;
ret = submit_bio_wait(bio);
  	bio_put(bio);
  	return ret;
+}
+
+/**
+ * blkdev_copy_range - copy range of sectors to a destination
+ * @dst_sector:	start sector of the destination to copy to
+ * @bv:		vector of source sectors
+ * @nr_vecs:	number of source sector vectors
+ * @gfp:	allocation flags to use
+ */
+int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
+		struct bio_vec *bv, int nr_vecs, gfp_t gfp)
+{
+	int ret, i;
+ if (bdev_copy_sectors(bdev))
+		return blkdev_copy_offload(bdev, dst_sector, bv, nr_vecs, gfp);
+
+	for (i = 0, ret = 0; i < nr_vecs && !ret; i++)
+		ret = __blkdev_copy(bdev, dst_sector, bv[i].bv_sector,
+				bv[i].bv_sectors, gfp);
+	return ret;
  }
+EXPORT_SYMBOL_GPL(blkdev_copy_range);
/**
   * blkdev_copy - copy source sectors to a destination on the same block device
@@ -455,9 +475,11 @@ static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
  int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
  		sector_t src_sector, sector_t nr_sects, gfp_t gfp)
  {
-	if (bdev_copy_sectors(bdev))
-		return blkdev_copy_offload(bdev, dst_sector, src_sector,
-					nr_sects, gfp);
-	return __blkdev_copy(bdev, dst_sector, src_sector, nr_sects, gfp);
+	struct bio_vec bv = {
+		.bv_sector = src_sector,
+		.bv_sectors = nr_sects,
+	};
+
+	return blkdev_copy_range(bdev, dst_sector, &bv, 1, gfp);
  }
  EXPORT_SYMBOL_GPL(blkdev_copy);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6f03c65867348..4b5095be19e1a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -241,6 +241,63 @@ static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode,
  	return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL);
  }
+static int blk_ioctl_copy_vec(struct block_device *bdev, blk_mode_t mode,
+		void __user *argp)
+{
+	sector_t align = bdev_logical_block_size(bdev) >> SECTOR_SHIFT;
+	struct bio_vec *bv, fast_bv[UIO_FASTIOV];
+	struct copy_range cr;
+	int i, nr, ret;
+	__u64 dst;
+
+	if (!(mode & BLK_OPEN_WRITE))
+		return -EBADF;
+	if (copy_from_user(&cr, argp, sizeof(cr)))
+		return -EFAULT;
+	if (!(IS_ALIGNED(cr.dst_sector, align)))
+		return -EINVAL;
+
+	nr = cr.nr_ranges;
+	if (nr <= UIO_FASTIOV) {
+		bv = fast_bv;
+	} else {
+		bv = kmalloc_array(nr, sizeof(*bv), GFP_KERNEL);
+		if (!bv)
+			return -ENOMEM;
+	}
+
+	dst = cr.dst_sector;
+	for (i = 0; i < nr; i++) {
+		struct copy_source csrc;
+		__u64 nr_sects, src;
+
+		if (copy_from_user(&csrc,
+				(void __user *)(cr.sources + i * sizeof(csrc)),
+				sizeof(csrc))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		nr_sects = csrc.nr_sectors;
+		src = csrc.src_sector;
+		if (!(IS_ALIGNED(src | nr_sects, align)) ||
+		    (src < dst && src + nr_sects > dst) ||
+		    (dst < src && dst + nr_sects > src)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		bv[i].bv_sectors = nr_sects;
+		bv[i].bv_sector = src;
+	}
+
+	ret = blkdev_copy_range(bdev, dst, bv, nr, GFP_KERNEL);
+out:
+	if (bv != fast_bv)
+		kfree(bv);
+	return ret;
+}
+
  static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
  		unsigned long arg)
  {
@@ -605,6 +662,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
  		return blk_ioctl_secure_erase(bdev, mode, argp);
  	case BLKCPY:
  		return blk_ioctl_copy(bdev, mode, argp);
+	case BLKCPY_VEC:
+		return blk_ioctl_copy_vec(bdev, mode, argp);
  	case BLKZEROOUT:
  		return blk_ioctl_zeroout(bdev, mode, arg);
  	case BLKGETDISKSEQ:

And that makes it even worse; introducing two ioctls which basically do
the same thing (or where one is actually a special case of the other)
is probably not what we should be doing.

Cheers,

Hannes
--
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@xxxxxxx                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux