On Tue, Aug 25, 2015 at 11:33 PM, Peng Tao <tao.peng@xxxxxxxxxxxxxxx> wrote: > Now that a few file systems are adding clone functionality, namingly > btrfs, NFS (later in the series) and XFS > (ttp://oss.sgi.com/archives/xfs/2015-06/msg00407.html), it makes sense > to pull the ioctl to common code. > > Add vfs_file_clone_range() helper and .clone_range file operation interface > to allow underlying filesystems to clone between regular files. > > The change in do_vfs_ioctl() is defered to next patch where btrfs > .clone_range is added, just so that we don't break btrfs CLONE ioctl > with this patch. > > Cc: linux-btrfs@xxxxxxxxxxxxxxx > Cc: linux-fsdevel@xxxxxxxxxxxxxxx > Signed-off-by: Peng Tao <tao.peng@xxxxxxxxxxxxxxx> > --- > fs/ioctl.c | 24 ++++++++++++++++++++++++ > fs/read_write.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ > include/linux/fs.h | 4 ++++ > include/uapi/linux/fs.h | 9 +++++++++ > 4 files changed, 82 insertions(+) > > diff --git a/fs/ioctl.c b/fs/ioctl.c > index 5d01d26..726c5d7 100644 > --- a/fs/ioctl.c > +++ b/fs/ioctl.c > @@ -215,6 +215,30 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) > return error; > } > > +static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, > + u64 off, u64 olen, u64 destoff) > +{ > + struct fd src_file = fdget(srcfd); > + int ret; > + > + if (!src_file.file) > + return -EBADF; > + ret = vfs_file_clone_range(src_file.file, dst_file, off, olen, destoff); > + > + fdput(src_file); > + return ret; > +} > + > +static long ioctl_file_clone_range(struct file *file, void __user *argp) > +{ > + struct file_clone_range args; > + > + if (copy_from_user(&args, argp, sizeof(args))) > + return -EFAULT; > + return ioctl_file_clone(file, args.src_fd, args.src_offset, > + args.src_length, args.dest_offset); > +} > + > #ifdef CONFIG_BLOCK > > static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) > diff --git a/fs/read_write.c b/fs/read_write.c > index 819ef3f..beaad2c 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -16,6 +16,7 @@ > #include <linux/pagemap.h> > #include <linux/splice.h> > #include <linux/compat.h> > +#include <linux/mount.h> > #include "internal.h" > > #include <asm/uaccess.h> > @@ -1327,3 +1328,47 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, > return do_sendfile(out_fd, in_fd, NULL, count, 0); > } > #endif > + > +int vfs_file_clone_range(struct file *src_file, struct file *dst_file, > + loff_t off, size_t len, loff_t dstoff) > +{ > + struct inode *src_ino; > + struct inode *dst_ino; > + ssize_t ret; > + > + if (!(src_file->f_mode & FMODE_READ) || > + !(dst_file->f_mode & FMODE_WRITE) || > + (dst_file->f_flags & O_APPEND) || > + !src_file->f_op || !src_file->f_op->clone_range) > + return -EINVAL; > + > + src_ino = file_inode(src_file); > + dst_ino = file_inode(dst_file); > + > + if (S_ISDIR(src_ino->i_mode) || S_ISDIR(dst_ino->i_mode)) > + return -EISDIR; > + > + /* sanity check on offsets and length */ > + if (off + len < off || dstoff + len < dstoff || > + off + len > i_size_read(src_ino)) > + return -EINVAL; > + > + if (src_ino->i_sb != dst_ino->i_sb || > + src_file->f_path.mnt != dst_file->f_path.mnt) > + return -EXDEV; > + > + ret = mnt_want_write_file(dst_file); > + if (ret) > + return ret; > + > + ret = src_file->f_op->clone_range(src_file, dst_file, off, len, dstoff); > + if (!ret) { > + fsnotify_access(src_file); > + fsnotify_modify(dst_file); > + } > + > + mnt_drop_write_file(dst_file); > + > + return ret; > +} > +EXPORT_SYMBOL(vfs_file_clone_range); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index cc008c3..612d7f4 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1628,6 +1628,8 @@ struct file_operations { > long (*fallocate)(struct file *file, int mode, loff_t offset, > loff_t len); > void (*show_fdinfo)(struct seq_file *m, struct file *f); > + int (*clone_range)(struct file *src_file, struct file *dst_file, > + loff_t off, size_t len, loff_t dstoff); One question to btrfs guys... I wanted to add the .clone_range operation explicit semantics such that it does not allow partial success, and returns either 0 for success or a negative failure code, because we don't not expect CLONE to succeed partially. Does btrfs CLONE have the same semantics? It looks like so by going over btrfs_clone() but it would be great if someone working on btrfs can confirm it. Thanks, Tao > #ifndef CONFIG_MMU > unsigned (*mmap_capabilities)(struct file *); > #endif > @@ -2678,6 +2680,8 @@ int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, > int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); > #define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod) > #define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod) > +int vfs_file_clone_range(struct file *src_file, struct file *dst_file, > + loff_t off, size_t len, loff_t dstoff); > > #ifdef CONFIG_BLOCK > typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 9b964a5..ac7f1c5 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -39,6 +39,13 @@ > #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ > #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ > > +struct file_clone_range { > + __s64 src_fd; > + __u64 src_offset; > + __u64 src_length; > + __u64 dest_offset; > +}; > + > struct fstrim_range { > __u64 start; > __u64 len; > @@ -159,6 +166,8 @@ struct inodes_stat_t { > #define FIFREEZE _IOWR('X', 119, int) /* Freeze */ > #define FITHAW _IOWR('X', 120, int) /* Thaw */ > #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ > +#define FICLONE _IOW(0x94, 9, int) /* Clone */ > +#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) /* Clone range */ > > #define FS_IOC_GETFLAGS _IOR('f', 1, long) > #define FS_IOC_SETFLAGS _IOW('f', 2, long) > -- > 1.8.3.1 > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html