Re: [PATCH] fs: try to clone files first in vfs_copy_file_range

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 25, 2016 at 10:40 AM, Christoph Hellwig <hch@xxxxxx> wrote:
> A clone is a perfectly fine implementation of a file copy, so most
> file systems just implement the copy that way.  Instead of duplicating
> this logic move it to the VFS.  Currently btrfs and XFS implement copies
> the same way as clones and there is no behavior change for them, cifs
> only implements clones and grow support for copy_file_range with this
> patch.  NFS implements both, so this will allow copy_file_range to work
> on servers that only implement CLONE and be lot more efficient on servers
> that implements CLONE and COPY.
>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/btrfs/ctree.h  |  3 ---
>  fs/btrfs/file.c   |  1 -
>  fs/btrfs/ioctl.c  | 12 ------------
>  fs/read_write.c   | 27 ++++++++++++++++++++++-----
>  fs/xfs/xfs_file.c | 19 -------------------
>  5 files changed, 22 insertions(+), 40 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index e4e01a9..a49df8b 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3229,9 +3229,6 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
>                       loff_t pos, size_t write_bytes,
>                       struct extent_state **cached);
>  int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
> -ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
> -                             struct file *file_out, loff_t pos_out,
> -                             size_t len, unsigned int flags);
>  int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
>                            struct file *file_out, loff_t pos_out, u64 len);
>
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 3a14c87..991cc99 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2998,7 +2998,6 @@ const struct file_operations btrfs_file_operations = {
>  #ifdef CONFIG_COMPAT
>         .compat_ioctl   = btrfs_compat_ioctl,
>  #endif
> -       .copy_file_range = btrfs_copy_file_range,
>         .clone_file_range = btrfs_clone_file_range,
>         .dedupe_file_range = btrfs_dedupe_file_range,
>  };
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 7acbd2c..dab7462 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3980,18 +3980,6 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
>         return ret;
>  }
>
> -ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
> -                             struct file *file_out, loff_t pos_out,
> -                             size_t len, unsigned int flags)
> -{
> -       ssize_t ret;
> -
> -       ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
> -       if (ret == 0)
> -               ret = len;
> -       return ret;
> -}
> -
>  int btrfs_clone_file_range(struct file *src_file, loff_t off,
>                 struct file *dst_file, loff_t destoff, u64 len)
>  {
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 190e0d36..6674a4b 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1542,20 +1542,37 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>         if (ret)
>                 return ret;
>
> -       ret = -EOPNOTSUPP;
> -       if (file_out->f_op->copy_file_range)
> +       /*
> +        * Try cloning first, this is supported by more file systems, and
> +        * more efficient if both clone and copy are supported (e.g. NFS).
> +        */

For fs that support both copy and clone, I am not convinced that it up to
VFS to take that decision for the application.
If application 'chose' copy over clone maybe it had a reason.
I suggest to move this to 'clone fallback' after copy_file_range and before
do_splice_direct.

IOWs, for all file systems in question except NFS,
clone_file_range is the 'generic' implementation used by copy_file_range.
So complying to VFS standards, fallback to generic handler if there is no
fs specific handler.

A hypothetical case why copy_range implementation would be preferred
by an application that runs over specific hypothetical fs - copy_file_range
can be more easily implemented as a killable copy loop, returning the length
that was copy before interrupted by a signal.

> +       if (file_in->f_op->clone_file_range) {
> +               ret = file_in->f_op->clone_file_range(file_in, pos_in,
> +                               file_out, pos_out, len);
> +               if (ret == 0) {
> +                       ret = len;
> +                       goto done;
> +               }
> +       }
> +
> +       if (file_out->f_op->copy_file_range) {
>                 ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
>                                                       pos_out, len, flags);
> -       if (ret == -EOPNOTSUPP)
> -               ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
> -                               len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
> +               if (ret != -EOPNOTSUPP)
> +                       goto done;
> +       }
>
> +       ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
> +                       len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
> +
> +done:
>         if (ret > 0) {
>                 fsnotify_access(file_in);
>                 add_rchar(current, ret);
>                 fsnotify_modify(file_out);
>                 add_wchar(current, ret);
>         }
> +
>         inc_syscr(current);
>         inc_syscw(current);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 6e4f7f9..86ecc9b 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -909,24 +909,6 @@ xfs_file_fallocate(
>         return error;
>  }
>
> -STATIC ssize_t
> -xfs_file_copy_range(
> -       struct file     *file_in,
> -       loff_t          pos_in,
> -       struct file     *file_out,
> -       loff_t          pos_out,
> -       size_t          len,
> -       unsigned int    flags)
> -{
> -       int             error;
> -
> -       error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
> -                                    len, false);
> -       if (error)
> -               return error;
> -       return len;
> -}
> -
>  STATIC int
>  xfs_file_clone_range(
>         struct file     *file_in,
> @@ -1625,7 +1607,6 @@ const struct file_operations xfs_file_operations = {
>         .fsync          = xfs_file_fsync,
>         .get_unmapped_area = thp_get_unmapped_area,
>         .fallocate      = xfs_file_fallocate,
> -       .copy_file_range = xfs_file_copy_range,
>         .clone_file_range = xfs_file_clone_range,
>         .dedupe_file_range = xfs_file_dedupe_range,
>  };
> --
> 2.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux