On Wednesday, November 1, 2017 4:31:00 AM IST Amir Goldstein wrote: > On Tue, Oct 31, 2017 at 3:53 PM, Amir Goldstein <amir73il@xxxxxxxxx> wrote: > > On Tue, Oct 31, 2017 at 3:43 PM, Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > >> On Mon, Oct 30, 2017 at 9:27 PM, Amir Goldstein <amir73il@xxxxxxxxx> wrote: > >>> From: Chandan Rajendra <chandan@xxxxxxxxxxxxxxxxxx> > >>> > >>> For stat(2) on lowerdir non-dir entries in non-samefs case, this commit > >>> provides unique values for st_dev. The unique values are obtained by > >>> allocating anonymous bdevs for each of the lowerdirs in the overlayfs > >>> instance. > >>> > >>> Signed-off-by: Chandan Rajendra <chandan@xxxxxxxxxxxxxxxxxx> > >>> [amir: rename to struct ovl_layer lower_layers and struct ovl_path] > >>> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> > >>> --- > >>> fs/overlayfs/inode.c | 18 +++++++++++ > >>> fs/overlayfs/namei.c | 52 ++++++++++++++++++------------- > >>> fs/overlayfs/overlayfs.h | 4 +-- > >>> fs/overlayfs/ovl_entry.h | 14 +++++++-- > >>> fs/overlayfs/readdir.c | 4 +-- > >>> fs/overlayfs/super.c | 80 ++++++++++++++++++++++++++++++------------------ > >>> fs/overlayfs/util.c | 7 ++++- > >>> 7 files changed, 121 insertions(+), 58 deletions(-) > >>> > >>> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c > >>> index 50e233ccca53..ec5c3ce91868 100644 > >>> --- a/fs/overlayfs/inode.c > >>> +++ b/fs/overlayfs/inode.c > >>> @@ -8,6 +8,7 @@ > >>> */ > >>> > >>> #include <linux/fs.h> > >>> +#include <linux/mount.h> > >>> #include <linux/slab.h> > >>> #include <linux/cred.h> > >>> #include <linux/xattr.h> > >>> @@ -15,6 +16,21 @@ > >>> #include <linux/ratelimit.h> > >>> #include "overlayfs.h" > >>> > >>> + > >>> +static dev_t ovl_get_pseudo_dev(struct dentry *dentry, dev_t dev) > >>> +{ > >>> + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; > >>> + struct ovl_entry *oe = dentry->d_fsdata; > >>> + > >>> + if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb->s_dev == dev) > >>> + return dev; > >> > >> This doesn't feel right. Okay, we may want to check against upper > >> device number for sanity and WARN if doesn't match, although we can > >> have all sorts of weirdness with btrfs, for example. So the logic > >> should be: > >> > >> if type is upper then use the dev returned by stat > >> else return the pseudo dev > >> > >> > >>> + > >>> + if (oe->numlower) > >>> + return oe->lowerstack[0].layer->pseudo_dev; > >>> + > >>> + return dev; > >>> +} > >>> + > >>> int ovl_setattr(struct dentry *dentry, struct iattr *attr) > >>> { > >>> int err; > >>> @@ -121,6 +137,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat, > >>> */ > >>> stat->dev = dentry->d_sb->s_dev; > >>> stat->ino = dentry->d_inode->i_ino; > >>> + } else { > >>> + stat->dev = ovl_get_pseudo_dev(dentry, stat->dev); > >>> } > >>> > >>> /* > >> > >> The above can be a separate patch. I.e. split this into: > >> > >> 1) add ovl_layer infrastructure > >> 2) use that infrastructure in getattr > >> > >> 1) doesn't change functionality while 2) does. > >> > >>> diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c > >>> index de2dac98e147..78e965527167 100644 > >>> --- a/fs/overlayfs/namei.c > >>> +++ b/fs/overlayfs/namei.c > >>> @@ -285,16 +285,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, > >>> > >>> > >>> static int ovl_check_origin(struct dentry *upperdentry, > >>> - struct path *lowerstack, unsigned int numlower, > >>> - struct path **stackp, unsigned int *ctrp) > >>> + struct ovl_path *lower, unsigned int numlower, > >>> + struct ovl_path **stackp, unsigned int *ctrp) > >>> { > >>> struct vfsmount *mnt; > >>> struct dentry *origin = NULL; > >>> int i; > >>> > >>> - > >>> for (i = 0; i < numlower; i++) { > >>> - mnt = lowerstack[i].mnt; > >>> + mnt = lower[i].layer->mnt; > >>> origin = ovl_get_origin(upperdentry, mnt); > >>> if (IS_ERR(origin)) > >>> return PTR_ERR(origin); > >>> @@ -308,12 +307,12 @@ static int ovl_check_origin(struct dentry *upperdentry, > >>> > >>> BUG_ON(*ctrp); > >>> if (!*stackp) > >>> - *stackp = kmalloc(sizeof(struct path), GFP_KERNEL); > >>> + *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); > >>> if (!*stackp) { > >>> dput(origin); > >>> return -ENOMEM; > >>> } > >>> - **stackp = (struct path) { .dentry = origin, .mnt = mnt }; > >>> + **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; > >>> *ctrp = 1; > >>> > >>> return 0; > >>> @@ -383,13 +382,13 @@ int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, > >>> * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. > >>> * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. > >>> */ > >>> -int ovl_verify_index(struct dentry *index, struct path *lowerstack, > >>> +int ovl_verify_index(struct dentry *index, struct ovl_path *lower, > >>> unsigned int numlower) > >>> { > >>> struct ovl_fh *fh = NULL; > >>> size_t len; > >>> - struct path origin = { }; > >>> - struct path *stack = &origin; > >>> + struct ovl_path origin = { }; > >>> + struct ovl_path *stack = &origin; > >>> unsigned int ctr = 0; > >>> int err; > >>> > >>> @@ -428,7 +427,7 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, > >>> if (err) > >>> goto fail; > >>> > >>> - err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); > >>> + err = ovl_check_origin(index, lower, numlower, &stack, &ctr); > >>> if (!err && !ctr) > >>> err = -ESTALE; > >>> if (err) > >>> @@ -570,11 +569,24 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path, int *idxp) > >>> } > >>> BUG_ON(idx > oe->numlower); > >>> *idxp = idx; > >>> - *path = oe->lowerstack[idx - 1]; > >>> + path->dentry = oe->lowerstack[idx - 1].dentry; > >>> + path->mnt = oe->lowerstack[idx - 1].layer->mnt; > >>> > >>> return (idx < oe->numlower) ? idx + 1 : -1; > >>> } > >>> > >>> +static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) > >>> +{ > >>> + int i; > >>> + > >>> + for (i = 0; i < ofs->numlower; i++) { > >>> + if (ofs->lower_layers[i].mnt == path->layer->mnt) > >>> + break; > >>> + } > >>> + > >>> + return i; > >>> +} > >>> + > >>> struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > >>> unsigned int flags) > >>> { > >>> @@ -583,7 +595,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > >>> struct ovl_fs *ofs = dentry->d_sb->s_fs_info; > >>> struct ovl_entry *poe = dentry->d_parent->d_fsdata; > >>> struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; > >>> - struct path *stack = NULL; > >>> + struct ovl_path *stack = NULL; > >>> struct dentry *upperdir, *upperdentry = NULL; > >>> struct dentry *index = NULL; > >>> unsigned int ctr = 0; > >>> @@ -648,17 +660,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > >>> > >>> if (!d.stop && poe->numlower) { > >>> err = -ENOMEM; > >>> - stack = kcalloc(ofs->numlower, sizeof(struct path), > >>> + stack = kcalloc(ofs->numlower, sizeof(struct ovl_path), > >>> GFP_KERNEL); > >>> if (!stack) > >>> goto out_put_upper; > >>> } > >>> > >>> for (i = 0; !d.stop && i < poe->numlower; i++) { > >>> - struct path lowerpath = poe->lowerstack[i]; > >>> + struct ovl_path lower = poe->lowerstack[i]; > >>> > >>> d.last = i == poe->numlower - 1; > >>> - err = ovl_lookup_layer(lowerpath.dentry, &d, &this); > >>> + err = ovl_lookup_layer(lower.dentry, &d, &this); > >>> if (err) > >>> goto out_put; > >>> > >>> @@ -666,7 +678,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > >>> continue; > >>> > >>> stack[ctr].dentry = this; > >>> - stack[ctr].mnt = lowerpath.mnt; > >>> + stack[ctr].layer = lower.layer; > >>> ctr++; > >>> > >>> if (d.stop) > >>> @@ -676,10 +688,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > >>> poe = roe; > >>> > >>> /* Find the current layer on the root dentry */ > >>> - for (i = 0; i < poe->numlower; i++) > >>> - if (poe->lowerstack[i].mnt == lowerpath.mnt) > >>> - break; > >>> - if (WARN_ON(i == poe->numlower)) > >>> + i = ovl_find_layer(ofs, &lower); > >>> + if (WARN_ON(i == ofs->numlower)) > >>> break; > >>> } > >>> } > >>> @@ -702,7 +712,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, > >>> goto out_put; > >>> > >>> oe->opaque = upperopaque; > >>> - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); > >>> + memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); > >>> dentry->d_fsdata = oe; > >>> > >>> if (upperdentry) > >>> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > >>> index 73ef1e850635..335e9a052995 100644 > >>> --- a/fs/overlayfs/overlayfs.h > >>> +++ b/fs/overlayfs/overlayfs.h > >>> @@ -248,7 +248,7 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) > >>> /* namei.c */ > >>> int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, > >>> struct dentry *origin, bool is_upper, bool set); > >>> -int ovl_verify_index(struct dentry *index, struct path *lowerstack, > >>> +int ovl_verify_index(struct dentry *index, struct ovl_path *lower, > >>> unsigned int numlower); > >>> int ovl_get_index_name(struct dentry *origin, struct qstr *name); > >>> int ovl_path_next(int idx, struct dentry *dentry, struct path *path, int *idxp); > >>> @@ -265,7 +265,7 @@ int ovl_check_d_type_supported(struct path *realpath); > >>> void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, > >>> struct dentry *dentry, int level); > >>> int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, > >>> - struct path *lowerstack, unsigned int numlower); > >>> + struct ovl_path *lower, unsigned int numlower); > >>> > >>> /* inode.c */ > >>> int ovl_set_nlink_upper(struct dentry *dentry); > >>> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h > >>> index 25d9b5adcd42..93eb6a044dd2 100644 > >>> --- a/fs/overlayfs/ovl_entry.h > >>> +++ b/fs/overlayfs/ovl_entry.h > >>> @@ -17,11 +17,21 @@ struct ovl_config { > >>> bool index; > >>> }; > >>> > >>> +struct ovl_layer { > >>> + struct vfsmount *mnt; > >>> + dev_t pseudo_dev; > >>> +}; > >>> + > >>> +struct ovl_path { > >>> + struct ovl_layer *layer; > >>> + struct dentry *dentry; > >>> +}; > >>> + > >>> /* private information held for overlayfs's superblock */ > >>> struct ovl_fs { > >>> struct vfsmount *upper_mnt; > >>> unsigned numlower; > >>> - struct vfsmount **lower_mnt; > >>> + struct ovl_layer *lower_layers; > >>> /* workbasedir is the path at workdir= mount option */ > >>> struct dentry *workbasedir; > >>> /* workdir is the 'work' directory under workbasedir */ > >>> @@ -52,7 +62,7 @@ struct ovl_entry { > >>> struct rcu_head rcu; > >>> }; > >>> unsigned numlower; > >>> - struct path lowerstack[]; > >>> + struct ovl_path lowerstack[]; > >>> }; > >>> > >>> struct ovl_entry *ovl_alloc_entry(unsigned int numlower); > >>> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c > >>> index bcc123c7706c..0ab657f2c1c8 100644 > >>> --- a/fs/overlayfs/readdir.c > >>> +++ b/fs/overlayfs/readdir.c > >>> @@ -1020,7 +1020,7 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, > >>> } > >>> > >>> int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, > >>> - struct path *lowerstack, unsigned int numlower) > >>> + struct ovl_path *lower, unsigned int numlower) > >>> { > >>> int err; > >>> struct dentry *index = NULL; > >>> @@ -1055,7 +1055,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, > >>> index = NULL; > >>> break; > >>> } > >>> - err = ovl_verify_index(index, lowerstack, numlower); > >>> + err = ovl_verify_index(index, lower, numlower); > >>> /* Cleanup stale and orphan index entries */ > >>> if (err && (err == -ESTALE || err == -ENOENT)) > >>> err = ovl_cleanup(dir, index); > >>> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > >>> index 8702803ba328..323dfd7a0236 100644 > >>> --- a/fs/overlayfs/super.c > >>> +++ b/fs/overlayfs/super.c > >>> @@ -219,9 +219,11 @@ static void ovl_put_super(struct super_block *sb) > >>> if (ufs->upper_mnt && ufs->upperdir_locked) > >>> ovl_inuse_unlock(ufs->upper_mnt->mnt_root); > >>> mntput(ufs->upper_mnt); > >>> - for (i = 0; i < ufs->numlower; i++) > >>> - mntput(ufs->lower_mnt[i]); > >>> - kfree(ufs->lower_mnt); > >>> + for (i = 0; i < ufs->numlower; i++) { > >>> + mntput(ufs->lower_layers[i].mnt); > >>> + free_anon_bdev(ufs->lower_layers[i].pseudo_dev); > >>> + } > >>> + kfree(ufs->lower_layers); > >>> > >>> kfree(ufs->config.lowerdir); > >>> kfree(ufs->config.upperdir); > >>> @@ -1026,24 +1028,35 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> } > >>> > >>> err = -ENOMEM; > >>> - ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); > >>> - if (ufs->lower_mnt == NULL) > >>> + ufs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), > >>> + GFP_KERNEL); > >>> + if (ufs->lower_layers == NULL) > >>> goto out_put_workdir; > >>> for (i = 0; i < numlower; i++) { > >>> - struct vfsmount *mnt = clone_private_mount(&stack[i]); > >>> + struct vfsmount *mnt; > >>> + dev_t dev; > >>> + > >>> + err = get_anon_bdev(&dev); > >>> + if (err) { > >>> + pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); > >>> + goto out_put_lower_layers; > >>> + } > >>> > >>> + mnt = clone_private_mount(&stack[i]); > >>> err = PTR_ERR(mnt); > >>> if (IS_ERR(mnt)) { > >>> pr_err("overlayfs: failed to clone lowerpath\n"); > >>> - goto out_put_lower_mnt; > >>> + free_anon_bdev(dev); > >>> + goto out_put_lower_layers; > >>> } > >>> /* > >>> - * Make lower_mnt R/O. That way fchmod/fchown on lower file > >>> + * Make lower layers R/O. That way fchmod/fchown on lower file > >>> * will fail instead of modifying lower fs. > >>> */ > >>> mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; > >>> > >>> - ufs->lower_mnt[ufs->numlower] = mnt; > >>> + ufs->lower_layers[ufs->numlower].mnt = mnt; > >>> + ufs->lower_layers[ufs->numlower].pseudo_dev = dev; > >>> ufs->numlower++; > >>> > >>> /* Check if all lower layers are on same sb */ > >>> @@ -1059,13 +1072,25 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) > >>> ufs->same_sb = NULL; > >>> > >>> + err = -ENOMEM; > >>> + oe = ovl_alloc_entry(numlower); > >>> + if (!oe) > >>> + goto out_put_lower_layers; > >>> + > >>> + for (i = 0; i < numlower; i++) { > >>> + oe->lowerstack[i].dentry = stack[i].dentry; > >>> + oe->lowerstack[i].layer = &(ufs->lower_layers[i]); > >>> + } > >>> + > >>> if (!(ovl_force_readonly(ufs)) && ufs->config.index) { > >>> /* Verify lower root is upper root origin */ > >>> - err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], > >>> - stack[0].dentry, false, true); > >>> + err = ovl_verify_origin(upperpath.dentry, > >>> + oe->lowerstack[0].layer->mnt, > >>> + oe->lowerstack[0].dentry, > >>> + false, true); > >>> if (err) { > >>> pr_err("overlayfs: failed to verify upper root origin\n"); > >>> - goto out_put_lower_mnt; > >>> + goto out_free_oe; > >>> } > >>> > >>> ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, > >>> @@ -1081,7 +1106,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> if (!err) > >>> err = ovl_indexdir_cleanup(ufs->indexdir, > >>> ufs->upper_mnt, > >>> - stack, numlower); > >>> + oe->lowerstack, > >>> + numlower); > >>> } > >>> if (err || !ufs->indexdir) > >>> pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); > >>> @@ -1106,11 +1132,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> /* Never override disk quota limits or use reserved space */ > >>> cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); > >>> > >>> - err = -ENOMEM; > >>> - oe = ovl_alloc_entry(numlower); > >>> - if (!oe) > >>> - goto out_put_cred; > >>> - > >>> sb->s_magic = OVERLAYFS_SUPER_MAGIC; > >>> sb->s_op = &ovl_super_operations; > >>> sb->s_xattr = ovl_xattr_handlers; > >>> @@ -1119,11 +1140,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> > >>> root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); > >>> if (!root_dentry) > >>> - goto out_free_oe; > >>> + goto out_put_cred; > >>> > >>> mntput(upperpath.mnt); > >>> for (i = 0; i < numlower; i++) > >>> mntput(stack[i].mnt); > >>> + kfree(stack); > >>> mntput(workpath.mnt); > >>> kfree(lowertmp); > >>> > >>> @@ -1132,11 +1154,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> if (ovl_is_impuredir(upperpath.dentry)) > >>> ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); > >>> } > >>> - for (i = 0; i < numlower; i++) { > >>> - oe->lowerstack[i].dentry = stack[i].dentry; > >>> - oe->lowerstack[i].mnt = ufs->lower_mnt[i]; > >>> - } > >>> - kfree(stack); > >>> > >>> root_dentry->d_fsdata = oe; > >>> > >>> @@ -1147,16 +1164,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) > >>> > >>> return 0; > >>> > >>> -out_free_oe: > >>> - kfree(oe); > >>> out_put_cred: > >>> put_cred(ufs->creator_cred); > >>> out_put_indexdir: > >>> dput(ufs->indexdir); > >>> -out_put_lower_mnt: > >>> - for (i = 0; i < ufs->numlower; i++) > >>> - mntput(ufs->lower_mnt[i]); > >>> - kfree(ufs->lower_mnt); > >>> +out_free_oe: > >>> + kfree(oe); > >>> +out_put_lower_layers: > >>> + for (i = 0; i < ufs->numlower; i++) { > >>> + if (ufs->lower_layers[i].mnt) > >>> + free_anon_bdev(ufs->lower_layers[i].pseudo_dev); > >>> + mntput(ufs->lower_layers[i].mnt); > >>> + } > >>> + kfree(ufs->lower_layers); > >>> out_put_workdir: > >>> dput(ufs->workdir); > >>> mntput(ufs->upper_mnt); > >> > >> I'm not even attempting to review the ovl_fill_super() changes. We > >> need to do something about that function (it's my fault for letting it > >> grow this big and convoluted). Do you want me to have a go at > >> cleaning it up? Or do you feel an urging desire to do so? > >> > > > > Well, I am up to my neck with the NFS export patches and besides > > I probably won't get the cleanup done the way you would ;-) > > so I prefer you have a go at it and I will rebase the patches on top > > of your cleanup. > > > > Pushed the requested changes to branch: > https://github.com/amir73il/linux/commits/ovl-nonsamefs-v6 > > Currently same tip as overlayfs-devel. > Amir, The changes look fine to me. Thanks for addressing the review comments. -- chandan -- To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html