From: Jan Blunck <j.blunck@xxxxxxxxxxxxx> Subject: In-kernel file copy between union mounted filesystems This patch introduces in-kernel file copy between union mounted filesystems. When a file is opened for writing but resides on a lower (thus read-only) layer of the union stack it is copied to the topmost union layer first. This patch uses the do_splice_direct() for doing the in-kernel file copy. Signed-off-by: Bharata B Rao <bharata@xxxxxxxxxx> Signed-off-by: Jan Blunck <j.blunck@xxxxxxxxxxxxx> --- fs/namei.c | 46 +++++ fs/union.c | 384 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/namei.h | 2 include/linux/union.h | 14 + 4 files changed, 445 insertions(+), 1 deletion(-) --- a/fs/namei.c +++ b/fs/namei.c @@ -830,8 +830,17 @@ done: path->mnt = mnt; path->dentry = dentry; - if (nd->dentry->d_sb != dentry->d_sb) + /* + * This should be checked after the following of unions. + * Otherwise we might run into trouble creating directories + * on mountpoints. :( + * But maybe we shouldn't set the LAST_LOWLEVEL flag here + * at all ... */ + if (nd->dentry->d_sb != dentry->d_sb) { path->mnt = find_mnt(dentry); + UM_DEBUG_UID("Setting LAST_LOWLEVEL for %s\n", name->name); + nd->um_flags |= LAST_LOWLEVEL; + } __follow_mount(path); follow_union_mount(&path->mnt, &path->dentry); @@ -950,6 +959,14 @@ static fastcall int __link_path_walk(con if (err) break; + if ((nd->flags & LOOKUP_TOPMOST) && + (nd->um_flags & LAST_LOWLEVEL)) { + err = union_create_topdir(nd,&next.dentry,&next.mnt); + if (err) + goto out_dput; + nd->um_flags &= ~LAST_LOWLEVEL; + } + err = -ENOENT; inode = next.dentry->d_inode; if (!inode) @@ -1005,6 +1022,15 @@ last_component: err = do_lookup(nd, &this, &next); if (err) break; + + if ((nd->flags & LOOKUP_TOPMOST) && + (nd->um_flags & LAST_LOWLEVEL)) { + err = union_create_topdir(nd,&next.dentry,&next.mnt); + if (err) + goto out_dput; + nd->um_flags &= ~LAST_LOWLEVEL; + } + inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) && inode && inode->i_op && inode->i_op->follow_link) { @@ -1177,6 +1203,7 @@ static int fastcall do_path_lookup(int d nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; + nd->um_flags = 0; nd->depth = 0; if (*name=='/') { @@ -1721,9 +1748,18 @@ int open_namei(int dfd, const char *path nd, flag); if (error) return error; + /* test for WRONLY and RDWR - flag's special lower bits */ + if (flag & 0x2) { + UM_DEBUG_UID("\"%s\" opened for writing\n", pathname); + error = union_copyup(nd, flag); + if (error) + return error; + } goto ok; } + UM_DEBUG_UID("open called with O_CREATE\n"); + /* * Create - we need to know the parent. */ @@ -1740,6 +1776,8 @@ int open_namei(int dfd, const char *path if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) goto exit; + UM_DEBUG_UID("do_last now\n"); + dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; mutex_lock(&dir->d_inode->i_mutex); @@ -1793,6 +1831,12 @@ do_last: error = -EISDIR; if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) goto exit; + + if (flag & 0x2) { + error = union_copyup(nd, flag); + if (error) + goto exit; + } ok: error = may_open(nd, acc_mode, flag); if (error) --- a/fs/union.c +++ b/fs/union.c @@ -15,6 +15,11 @@ #include <linux/module.h> #include <linux/mount.h> #include <linux/file.h> +#include <linux/mm.h> +#include <linux/quotaops.h> +#include <linux/dnotify.h> +#include <linux/security.h> +#include <linux/pipe_fs_i.h> void __union_check(struct dentry *dentry) @@ -302,6 +307,53 @@ void __dput_union(struct dentry *dentry) return; } +/* + * union_relookup_topmost - lookup and create the topmost path to dentry + * @nd: pointer to nameidata + * @flags: lookup flags + */ +int union_relookup_topmost(struct nameidata *nd, int flags) +{ + int err; + char *kbuf, *name; + struct nameidata this; + + UM_DEBUG_UID("relookup the topmost dir for %s\n", + nd->dentry->d_name.name); + + kbuf = (char *)__get_free_page(GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + name = d_path(nd->dentry, nd->mnt, kbuf, PAGE_SIZE); + err = PTR_ERR(name); + if (IS_ERR(name)) + goto free_page; + + err = path_lookup(name, flags|LOOKUP_CREATE|LOOKUP_TOPMOST, &this); + if (err) + goto free_page; + + path_release(nd); + nd->dentry = this.dentry; + nd->mnt = this.mnt; + + /* If we are looking up the parent, copy the child details also */ + if (flags & LOOKUP_PARENT) { + nd->last = this.last; + nd->last_type = this.last_type; + } + + /* + * the nd->flags should be unchanged + */ + BUG_ON(this.um_flags & LAST_LOWLEVEL); + nd->um_flags &= ~LAST_LOWLEVEL; + free_page: + free_page((unsigned long)kbuf); + return err; +} + void attach_mnt_union(struct vfsmount *mnt, struct nameidata *nd) { struct dentry *tmp; @@ -1355,3 +1407,335 @@ out: union_cache_free(&cb.list); return res; } + +/* + * Union mount copyup support + */ + +/* + * Just do what vfs_create() would do, but the union mount way + */ +static struct dentry * union_create(struct dentry *parent, struct dentry *old, + struct nameidata *nd) +{ + struct dentry *dentry; + int err, mode; + + dentry = __lookup_hash_single(&old->d_name, parent, NULL); + if (IS_ERR(dentry)) + goto exit; + + err = -EEXIST; + if (dentry->d_inode) { + dput(dentry); + goto error; + } + + err = -ENOENT; + if (IS_DEADDIR(parent->d_inode)) + goto error; + err = -EACCES; /* shouldn't it be ENOSYS? */ + if (!parent->d_inode->i_op || !parent->d_inode->i_op->create) + goto error; + + mode = old->d_inode->i_mode & S_IALLUGO; + mode |= S_IFREG; + + err = security_inode_create(parent->d_inode, dentry, mode); + if (err) + goto error; + + DQUOT_INIT(parent->d_inode); + err = parent->d_inode->i_op->create(parent->d_inode, dentry, mode, nd); + if (err) + goto error; + + dentry->d_inode->i_uid = old->d_inode->i_uid; + dentry->d_inode->i_gid = old->d_inode->i_gid; + mark_inode_dirty(dentry->d_inode); +exit: + return dentry; +error: + return ERR_PTR(err); +} + +/* + * Just do what vfs_mkdir() would do, but the union mount way + */ +static struct dentry * union_mkdir(struct dentry *parent, struct dentry *dir) +{ + struct dentry *dentry; + int err, mode; + + dentry = __lookup_hash_single(&dir->d_name, parent, NULL); + if (IS_ERR(dentry)) + goto exit; + + err = -EEXIST; + if (dentry->d_inode) { + dput(dentry); + goto error; + } + + err = -ENOENT; + if (IS_DEADDIR(parent->d_inode)) + goto error; + err = -EPERM; + if (!parent->d_inode->i_op || !parent->d_inode->i_op->mkdir) + goto error; + + mode = dir->d_inode->i_mode & (S_IRWXUGO|S_ISVTX); + + err = security_inode_mkdir(parent->d_inode, dentry, mode); + if (err) + goto error; + + DQUOT_INIT(parent->d_inode); + err = parent->d_inode->i_op->mkdir(parent->d_inode, dentry, mode); + if (err) + goto error; + + dentry->d_inode->i_uid = dir->d_inode->i_uid; + dentry->d_inode->i_gid = dir->d_inode->i_gid; + mark_inode_dirty(dentry->d_inode); +exit: + return dentry; +error: + return ERR_PTR(err); +} + +static void __update_fs_pwd(struct dentry *old, struct dentry *new) +{ + struct dentry *old_pwd = NULL; + struct vfsmount *old_pwdmnt = NULL; + struct vfsmount *new_pwdmnt = find_mnt(new); + + write_lock(¤t->fs->lock); + if (current->fs->pwd == old) { + old_pwd = current->fs->pwd; + old_pwdmnt = current->fs->pwdmnt; + current->fs->pwdmnt = mntget(new_pwdmnt); + current->fs->pwd = __dget(new); + UM_DEBUG_UID("replacing fs->pwd\n"); + UM_DEBUG_UID("oldpwd: name=\"%s\", inode=%p, devname=%s\n", + old_pwd->d_name.name, old_pwd->d_inode, + old_pwdmnt->mnt_devname); + UM_DEBUG_UID("newpwd: name=\"%s\", inode=%p, devname=%s\n", + new->d_name.name, new->d_inode, + new_pwdmnt->mnt_devname); + } + write_unlock(¤t->fs->lock); + + if (old_pwd) { + __dput(old_pwd); + mntput(old_pwdmnt); + } + + mntput(new_pwdmnt); + + return; +} + +struct dentry * union_create_topmost(struct nameidata *nd, struct dentry *old) +{ + struct dentry *dentry; + struct dentry *parent = nd->dentry; + + UM_DEBUG_UID("dentry=%s\n", old->d_name.name); + + BUG_ON(parent->d_sb == old->d_sb); + if (!S_ISREG(old->d_inode->i_mode)) { + UM_DEBUG("This filetype isn't supported!\n"); + dentry = ERR_PTR(-EINVAL); + goto exit; + } + + /* + * Create the topmost regular file here. + */ + mutex_lock(&parent->d_inode->i_mutex); + dentry = union_create(parent, old, nd); + mutex_unlock(&parent->d_inode->i_mutex); + if (IS_ERR(dentry)) { + UM_DEBUG("some error occurred\n"); + goto exit; + } + +exit: + return dentry; +} + +int union_create_topdir(struct nameidata *nd, + struct dentry **dentry, struct vfsmount **mnt) +{ + struct dentry *topdir; + struct dentry *parent = nd->dentry; + + UM_DEBUG_UID("dentry=%s\n", (*dentry)->d_name.name); + + if (parent->d_sb == (*dentry)->d_sb) + return 0; + + if (!S_ISDIR((*dentry)->d_inode->i_mode)) { + UM_DEBUG("Unsupported filetype!\n"); + BUG(); + } + + /* + * Create the topmost directory here. + */ + spin_lock(&(*dentry)->d_lock); + if (!(*dentry)->d_union) { + UM_DEBUG_LOCK("Allocate lock for \"%s\"\n", + (*dentry)->d_name.name); + (*dentry)->d_union = union_alloc(); + spin_unlock(&(*dentry)->d_lock); + } else { + spin_unlock(&(*dentry)->d_lock); + union_lock(*dentry); + } + mutex_lock(&parent->d_inode->i_mutex); + topdir = union_mkdir(parent, *dentry); + if (IS_ERR(topdir)) { + UM_DEBUG("some error occurred\n"); + mutex_unlock(&parent->d_inode->i_mutex); + union_unlock(*dentry); + return PTR_ERR(topdir); + } + + spin_lock(&topdir->d_lock); + if (topdir->d_union) { + UM_DEBUG("Aaargh! topdir \"%s\" already has a lock?!\n", + topdir->d_name.name); + dump_stack(); + } + topdir->d_union = union_get((*dentry)->d_union); + spin_unlock(&topdir->d_lock); + append_to_stack(topdir, *dentry); + __update_fs_pwd(*dentry, topdir); + *dentry = topdir; + mutex_unlock(&parent->d_inode->i_mutex); + union_unlock(*dentry); + + if (nd->mnt != *mnt) { + mntput(*mnt); + *mnt = mntget(nd->mnt); + } + + return 0; +} + +int union_copy_file(struct dentry *old_dentry, struct vfsmount *old_mnt, + struct dentry *new_dentry, struct vfsmount *new_mnt) +{ + int ret; + size_t size; + loff_t offset; + struct file *old_file, *new_file; + + dget(old_dentry); + mntget(old_mnt); + old_file = dentry_open(old_dentry, old_mnt, O_RDONLY); + if (IS_ERR(old_file)) + return PTR_ERR(old_file); + + dget(new_dentry); + mntget(new_mnt); + new_file = dentry_open(new_dentry, new_mnt, O_WRONLY); + ret = PTR_ERR(new_file); + if (IS_ERR(new_file)) + goto fput_old; + + size = i_size_read(old_file->f_path.dentry->d_inode); + if (((size_t)size != size) || ((ssize_t)size != size)) { + ret = -EFBIG; + goto fput_new; + } + + offset = 0; + ret = do_splice_direct(old_file, &offset, new_file, size, + SPLICE_F_MOVE); + if (ret >= 0) + ret = 0; + fput_new: + fput(new_file); + fput_old: + fput(old_file); + return ret; +} + +/** + * union_copyup - copy a file to the topmost layer of the union stack + * @nd: nameidata pointer to the file + * @flags: flags given to open_namei + */ +int union_copyup(struct nameidata *nd, int flags) +{ + struct dentry *dir; + struct dentry *dentry; + int err; + + if (!union_is_member(nd->dentry, nd->mnt)) + return 0; + if (!S_ISREG(nd->dentry->d_inode->i_mode)) + return 0; + + err = union_relookup_topmost(nd, nd->flags|LOOKUP_PARENT); + if (err) + return err; + + dir = nd->dentry; + nd->flags &= ~LOOKUP_PARENT; + union_lock(nd->dentry); + mutex_lock(&dir->d_inode->i_mutex); + dentry = __lookup_hash_union(&nd->last, nd->dentry, nd); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + mutex_unlock(&dir->d_inode->i_mutex); + union_unlock(nd->dentry); + return err; + } + + mutex_unlock(&dir->d_inode->i_mutex); + union_unlock(nd->dentry); + + err = -ENOENT; + if (!dentry->d_inode) + goto exit_dput; + + follow_mount(&nd->mnt, &dentry); + + err = -ENOENT; + if (!dentry->d_inode) + goto exit_dput; + + if (dentry->d_parent != dir) { + struct dentry *tmp; + struct vfsmount *old_mnt; + + UM_DEBUG_UID("already exists -> copy file\n"); + tmp = union_create_topmost(nd, dentry); + if (IS_ERR(tmp)) + goto exit_dput; + + old_mnt = find_mnt(dentry); + err = union_copy_file(dentry, old_mnt, tmp, nd->mnt); + if (err) { + int ret = vfs_unlink(tmp->d_inode, tmp); + BUG_ON(ret); + /* FIXME: not sure if there are return value + * we should not BUG() on */ + } + dput(dentry); + dentry = tmp; + mntput(old_mnt); + } + + dput(nd->dentry); + nd->dentry = dentry; + return 0; + +exit_dput: + dput(dentry); + return err; +} --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -58,6 +58,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 #define LOOKUP_REVAL 64 +#define LOOKUP_TOPMOST 128 +#define LOOKUP_WHT 256 /* * Intent data */ --- a/include/linux/union.h +++ b/include/linux/union.h @@ -31,11 +31,25 @@ extern struct dentry * __lookup_hash_uni struct nameidata *); extern int readdir_union(struct file *, void *, filldir_t); +/* copy-up support */ +extern struct dentry * union_create_topmost(struct nameidata *, struct dentry *); +extern int union_create_topdir(struct nameidata *, struct dentry **, struct vfsmount **); +extern int union_is_member(struct dentry *, struct vfsmount *); +extern int union_copy_file(struct dentry *, struct vfsmount *, struct dentry *, struct vfsmount *); +extern int union_copyup(struct nameidata *, int); +extern int union_relookup_topmost(struct nameidata *, int); + #else /* CONFIG_UNION_MOUNT */ #define attach_mnt_union(mnt,nd) do { /* empty */ } while (0) #define detach_mnt_union(mnt,nd) do { /* empty */ } while (0) #define follow_union_mount(x,y) do { /* empty */ } while (0) +#define union_create_topmost(x,y) ({ BUG(); ERR_PTR(-EINVAL); }) +#define union_create_topdir(x,y,z) ({ (0); }) +#define union_is_member(x,y) ({ (0); }) +#define union_copy_file(dentry1,mnt1,dentry2,mnt2) ({ (0); }) +#define union_copyup(x,y) ({ (0); }) +#define union_relookup_topmost(x,y) ({ (0); }) #endif /* CONFIG_UNION_MOUNT */ - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html