From: Keith Busch <kbusch@xxxxxxxxxx> Copy offload can be used to defrad or garbage collect data spread across the disk. Most storage protocols provide a way to specifiy multiple sources in a single copy commnd, so introduce kernel and user space interfaces to accomplish that. Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx> --- block/blk-lib.c | 50 ++++++++++++++++++++++++---------- block/ioctl.c | 59 +++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 2 ++ include/uapi/linux/fs.h | 14 ++++++++++ 4 files changed, 111 insertions(+), 14 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index a538acbaa2cd7..7513b876a5399 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -424,26 +424,46 @@ static int __blkdev_copy(struct block_device *bdev, sector_t dst_sector, } static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector, - sector_t src_sector, sector_t nr_sects, gfp_t gfp) + struct bio_vec *bv, int nr_vecs, gfp_t gfp) { + unsigned size = 0; struct bio *bio; - int ret; - - struct bio_vec bv = { - .bv_sector = src_sector, - .bv_sectors = nr_sects, - }; + int ret, i; - bio = bio_alloc(bdev, 1, REQ_OP_COPY, gfp); - bio_add_copy_src(bio, &bv); + bio = bio_alloc(bdev, nr_vecs, REQ_OP_COPY, gfp); + for (i = 0; i < nr_vecs; i++) { + size += bv[i].bv_sectors << SECTOR_SHIFT; + bio_add_copy_src(bio, &bv[i]); + } bio->bi_iter.bi_sector = dst_sector; - bio->bi_iter.bi_size = nr_sects << SECTOR_SHIFT; + bio->bi_iter.bi_size = size; ret = submit_bio_wait(bio); bio_put(bio); return ret; +} + +/** + * blkdev_copy_range - copy range of sectors to a destination + * @dst_sector: start sector of the destination to copy to + * @bv: vector of source sectors + * @nr_vecs: number of source sector vectors + * @gfp: allocation flags to use + */ +int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector, + struct bio_vec *bv, int nr_vecs, gfp_t gfp) +{ + int ret, i; + if (bdev_copy_sectors(bdev)) + return blkdev_copy_offload(bdev, dst_sector, bv, nr_vecs, gfp); + + for (i = 0, ret = 0; i < nr_vecs && !ret; i++) + ret = __blkdev_copy(bdev, dst_sector, bv[i].bv_sector, + bv[i].bv_sectors, gfp); + return ret; } +EXPORT_SYMBOL_GPL(blkdev_copy_range); /** * blkdev_copy - copy source sectors to a destination on the same block device @@ -455,9 +475,11 @@ static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector, int blkdev_copy(struct block_device *bdev, sector_t dst_sector, sector_t src_sector, sector_t nr_sects, gfp_t gfp) { - if (bdev_copy_sectors(bdev)) - return blkdev_copy_offload(bdev, dst_sector, src_sector, - nr_sects, gfp); - return __blkdev_copy(bdev, dst_sector, src_sector, nr_sects, gfp); + struct bio_vec bv = { + .bv_sector = src_sector, + .bv_sectors = nr_sects, + }; + + return blkdev_copy_range(bdev, dst_sector, &bv, 1, gfp); } EXPORT_SYMBOL_GPL(blkdev_copy); diff --git a/block/ioctl.c b/block/ioctl.c index 6f03c65867348..4b5095be19e1a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -241,6 +241,63 @@ static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode, return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL); } +static int blk_ioctl_copy_vec(struct block_device *bdev, blk_mode_t mode, + void __user *argp) +{ + sector_t align = bdev_logical_block_size(bdev) >> SECTOR_SHIFT; + struct bio_vec *bv, fast_bv[UIO_FASTIOV]; + struct copy_range cr; + int i, nr, ret; + __u64 dst; + + if (!(mode & BLK_OPEN_WRITE)) + return -EBADF; + if (copy_from_user(&cr, argp, sizeof(cr))) + return -EFAULT; + if (!(IS_ALIGNED(cr.dst_sector, align))) + return -EINVAL; + + nr = cr.nr_ranges; + if (nr <= UIO_FASTIOV) { + bv = fast_bv; + } else { + bv = kmalloc_array(nr, sizeof(*bv), GFP_KERNEL); + if (!bv) + return -ENOMEM; + } + + dst = cr.dst_sector; + for (i = 0; i < nr; i++) { + struct copy_source csrc; + __u64 nr_sects, src; + + if (copy_from_user(&csrc, + (void __user *)(cr.sources + i * sizeof(csrc)), + sizeof(csrc))) { + ret = -EFAULT; + goto out; + } + + nr_sects = csrc.nr_sectors; + src = csrc.src_sector; + if (!(IS_ALIGNED(src | nr_sects, align)) || + (src < dst && src + nr_sects > dst) || + (dst < src && dst + nr_sects > src)) { + ret = -EINVAL; + goto out; + } + + bv[i].bv_sectors = nr_sects; + bv[i].bv_sector = src; + } + + ret = blkdev_copy_range(bdev, dst, bv, nr, GFP_KERNEL); +out: + if (bv != fast_bv) + kfree(bv); + return ret; +} + static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, unsigned long arg) { @@ -605,6 +662,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, return blk_ioctl_secure_erase(bdev, mode, argp); case BLKCPY: return blk_ioctl_copy(bdev, mode, argp); + case BLKCPY_VEC: + return blk_ioctl_copy_vec(bdev, mode, argp); case BLKZEROOUT: return blk_ioctl_zeroout(bdev, mode, arg); case BLKGETDISKSEQ: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39ba0e91d43e..a77f2298754b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1182,6 +1182,8 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp); int blkdev_copy(struct block_device *bdev, sector_t dst_sector, sector_t src_sector, sector_t nr_sects, gfp_t gfp); +int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector, + struct bio_vec *bv, int nr_vecs, gfp_t gfp); #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 534f157ce22e9..aed965f74ea2c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -218,6 +218,20 @@ struct fsxattr { /* [0] = destination lba, [1] = source lba, [2] = number of sectors */ #define BLKCPY _IOWR(0x12,142,__u64[3]) +struct copy_source { + __u64 src_sector; + __u64 nr_sectors; +}; + +struct copy_range { + __u64 dst_sector; + __u16 nr_ranges; + __u8 rsvd[6]; + __u64 sources; /* user space pointer to struct copy_source[] */ +}; +#define BLKCPY_VEC _IOWR(0x12,143,struct copy_range) + + #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ -- 2.47.1