Introduces the FOP_CROSS_TYPE_COPYFILE flag, and implements it for overlayfs. This enables copy_file_range between an overlayfs mount and its constituent layers. Signed-off-by: Han-Wen Nienhuys <hanwen@xxxxxxxxxxx> --- fs/overlayfs/file.c | 60 ++++++++++++++++++++++++++++++--------------- fs/read_write.c | 15 ++++++++---- include/linux/fs.h | 4 +++ 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 969b458100fe..97b394737251 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -536,6 +536,9 @@ static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice) return ret; } +static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags); enum ovl_copyop { OVL_COPY, OVL_CLONE, @@ -547,30 +550,42 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, loff_t len, unsigned int flags, enum ovl_copyop op) { struct inode *inode_out = file_inode(file_out); - struct file *realfile_in, *realfile_out; + struct file *realfile_in = file_in; + struct file *realfile_out = file_out; const struct cred *old_cred; loff_t ret; + bool in_overlay = file_in->f_op->copy_file_range == &ovl_copy_file_range; + bool out_overlay = file_out->f_op->copy_file_range == &ovl_copy_file_range; - inode_lock(inode_out); - if (op != OVL_DEDUPE) { - /* Update mode */ - ovl_copyattr(inode_out); - ret = file_remove_privs(file_out); - if (ret) - goto out_unlock; + if (WARN_ON_ONCE(!in_overlay && !out_overlay)) + return -EXDEV; + + if (in_overlay) { + realfile_in = ovl_real_file(file_in); + ret = PTR_ERR(realfile_in); + if (IS_ERR(realfile_in)) + return ret; } - realfile_out = ovl_real_file(file_out); - ret = PTR_ERR(realfile_out); - if (IS_ERR(realfile_out)) - goto out_unlock; + if (out_overlay) { + inode_lock(inode_out); - realfile_in = ovl_real_file(file_in); - ret = PTR_ERR(realfile_in); - if (IS_ERR(realfile_in)) - goto out_unlock; + if (op != OVL_DEDUPE) { + /* Update mode */ + ovl_copyattr(inode_out); + ret = file_remove_privs(file_out); + if (ret) + goto out_unlock; + } + + realfile_out = ovl_real_file(file_out); + ret = PTR_ERR(realfile_out); + if (IS_ERR(realfile_out)) + goto out_unlock; + + old_cred = ovl_override_creds(file_inode(file_out)->i_sb); + } - old_cred = ovl_override_creds(file_inode(file_out)->i_sb); switch (op) { case OVL_COPY: ret = vfs_copy_file_range(realfile_in, pos_in, @@ -588,13 +603,16 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, flags); break; } - ovl_revert_creds(old_cred); /* Update size */ - ovl_file_modified(file_out); + if (out_overlay) { + ovl_file_modified(file_out); + ovl_revert_creds(old_cred); + } out_unlock: - inode_unlock(inode_out); + if (out_overlay) + inode_unlock(inode_out); return ret; } @@ -654,6 +672,8 @@ static int ovl_flush(struct file *file, fl_owner_t id) } const struct file_operations ovl_file_operations = { + .fop_flags = FOP_CROSS_TYPE_COPYFILE, + .open = ovl_open, .release = ovl_release, .llseek = ovl_llseek, diff --git a/fs/read_write.c b/fs/read_write.c index a6133241dfb8..93618441a02d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1489,7 +1489,7 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * We allow some filesystems to handle cross sb copy, but passing * a file of the wrong filesystem type to filesystem driver can result * in an attempt to dereference the wrong type of ->private_data, so - * avoid doing that until we really have a good reason. + * avoid doing unless FOP_CROSS_TYPE_COPYFILE is set. * * nfs and cifs define several different file_system_type structures * and several different sets of file_operations, but they all end up @@ -1497,6 +1497,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, */ if (flags & COPY_FILE_SPLICE) { /* cross sb splice is allowed */ + } else if (file_in->f_op->fop_flags & FOP_CROSS_TYPE_COPYFILE || + file_out->f_op->fop_flags & FOP_CROSS_TYPE_COPYFILE) { + /* file system understands how to cross FS types */ } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) @@ -1576,10 +1579,12 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (!splice && file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); + if (!splice && (file_in->f_op->copy_file_range || file_out->f_op->copy_file_range)) { + ret = (file_in->f_op->copy_file_range ? + file_in->f_op->copy_file_range : + file_out->f_op->copy_file_range)(file_in, pos_in, + file_out, pos_out, + len, flags); } else if (!splice && file_in->f_op->remap_file_range && samesb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, diff --git a/include/linux/fs.h b/include/linux/fs.h index a4af70367f8a..1248a2542758 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2143,6 +2143,10 @@ struct file_operations { #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ #define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +/* copy_file_range accepts source and destination on different types of file + * system. If set, file_operations.copy_file_range must also be set. + */ +#define FOP_CROSS_TYPE_COPYFILE ((__force fop_flags_t)(1 << 8)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, -- 2.43.0