On Wed, Aug 18, 2021 at 09:07:54PM +0530, Nitesh Shetty wrote: > On Wed, Aug 18, 2021 at 5:06 AM Darrick J. Wong <djwong@xxxxxxxxxx> wrote: > > > > On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote: > > > From: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> > > > > > > Add new BLKCOPY ioctl that offloads copying of one or more sources ranges > > > to a destination in the device. COPY ioctl accepts a 'copy_range' > > > structure that contains destination (in sectors), no of sources and > > > pointer to the array of source ranges. Each source range is represented by > > > 'range_entry' that contains start and length of source ranges (in sectors) > > > > > > MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and > > > MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle. > > > > > > Example code, to issue BLKCOPY: > > > /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to > > > * [64,24], on the same device */ > > > > > > int main(void) > > > { > > > int ret, fd; > > > struct range_entry source_range[] = {{.src = 0, .len = 8}, > > > {.src = 16, .len = 8}, {.src = 32, .len = 8},}; > > > struct copy_range cr; > > > > > > cr.dest = 64; > > > cr.nr_range = 3; > > > cr.range_list = (__u64)&source_range; > > > > > > fd = open("/dev/nvme0n1", O_RDWR); > > > if (fd < 0) return 1; > > > > > > ret = ioctl(fd, BLKCOPY, &cr); > > > if (ret < 0) printf("copy failure\n"); > > > > > > close(fd); > > > > > > return ret; > > > } > > > > > > Signed-off-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> > > > Signed-off-by: SelvaKumar S <selvakuma.s1@xxxxxxxxxxx> > > > Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> > > > --- > > > block/ioctl.c | 33 +++++++++++++++++++++++++++++++++ > > > include/uapi/linux/fs.h | 8 ++++++++ > > > 2 files changed, 41 insertions(+) > > > > > > diff --git a/block/ioctl.c b/block/ioctl.c > > > index eb0491e90b9a..2af56d01e9fe 100644 > > > --- a/block/ioctl.c > > > +++ b/block/ioctl.c > > > @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, > > > GFP_KERNEL, flags); > > > } > > > > > > +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode, > > > + unsigned long arg) > > > +{ > > > + struct copy_range crange; > > > + struct range_entry *rlist; > > > + int ret; > > > + > > > + if (!(mode & FMODE_WRITE)) > > > + return -EBADF; > > > + > > > + if (copy_from_user(&crange, (void __user *)arg, sizeof(crange))) > > > + return -EFAULT; > > > + > > > + rlist = kmalloc_array(crange.nr_range, sizeof(*rlist), > > > + GFP_KERNEL); > > > + if (!rlist) > > > + return -ENOMEM; > > > + > > > + if (copy_from_user(rlist, (void __user *)crange.range_list, > > > + sizeof(*rlist) * crange.nr_range)) { > > > + ret = -EFAULT; > > > + goto out; > > > + } > > > + > > > + ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest, > > > + GFP_KERNEL, 0); > > > +out: > > > + kfree(rlist); > > > + return ret; > > > +} > > > + > > > static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, > > > unsigned long arg) > > > { > > > @@ -468,6 +499,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, > > > case BLKSECDISCARD: > > > return blk_ioctl_discard(bdev, mode, arg, > > > BLKDEV_DISCARD_SECURE); > > > + case BLKCOPY: > > > + return blk_ioctl_copy(bdev, mode, arg); > > > case BLKZEROOUT: > > > return blk_ioctl_zeroout(bdev, mode, arg); > > > case BLKGETDISKSEQ: > > > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > > > index 7a97b588d892..4183688ff398 100644 > > > --- a/include/uapi/linux/fs.h > > > +++ b/include/uapi/linux/fs.h > > > @@ -76,6 +76,13 @@ struct range_entry { > > > __u64 len; > > > }; > > > > > > +struct copy_range { > > > + __u64 dest; > > > + __u64 nr_range; > > > > If the maximum number of elements in the range list is 1<<12, there's no > > need for this to be larger than a u16, right? > > > > > + __u64 range_list; > > > > Pointers embedded in a structure are /not/ a good idea, because this > > will create a lot of compatibility headaches for 32-bit binaries running > > on 64-bit kernels. Please just make the size of this header structure > > a multiple of 8 bytes and put the range_entry list immediately after it. > > > > struct copy_range { > > __s64 dest_offset; > > __u32 nr_range_entries; > > __u32 flags; > > __u64 reserved[2]; > > }; > > > > struct __user range_entry *re = ((struct range_entry *)(copyhead + 1)); > > > > copy_from_user(&urk, re...); > > > > --D > > > Thanks, this is better. 'Reserved' field was there to be used for > future extension of the interface. > Now that you mentioned 'flags', it seems we can do away with > 'reserved' fields altogether? We still want the reserved-must-be-zero fields so that adding the first field or two doesn't require changes to the pointer arithmetic. Also, I suppose you could make the relationship between copy_range and range_entry more explicit: struct copy_range { __s64 dest_offset; __u32 nr_range_entries; __u32 flags; __u64 reserved[2]; /* must come last */ struct range_entry entries[]; }; struct __user range_entry *re = ©head->entries[0]; --D > > Regards, > Nitesh Shetty