On Tue, Oct 31, 2017 at 3:43 PM, Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > On Mon, Oct 30, 2017 at 9:27 PM, Amir Goldstein <amir73il@xxxxxxxxx> wrote: >> From: Chandan Rajendra <chandan@xxxxxxxxxxxxxxxxxx> >> >> For stat(2) on lowerdir non-dir entries in non-samefs case, this commit >> provides unique values for st_dev. The unique values are obtained by >> allocating anonymous bdevs for each of the lowerdirs in the overlayfs >> instance. >> >> Signed-off-by: Chandan Rajendra <chandan@xxxxxxxxxxxxxxxxxx> >> [amir: rename to struct ovl_layer lower_layers and struct ovl_path] >> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> >> --- >> fs/overlayfs/inode.c | 18 +++++++++++ >> fs/overlayfs/namei.c | 52 ++++++++++++++++++------------- >> fs/overlayfs/overlayfs.h | 4 +-- >> fs/overlayfs/ovl_entry.h | 14 +++++++-- >> fs/overlayfs/readdir.c | 4 +-- >> fs/overlayfs/super.c | 80 ++++++++++++++++++++++++++++++------------------ >> fs/overlayfs/util.c | 7 ++++- >> 7 files changed, 121 insertions(+), 58 deletions(-) >> >> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c >> index 50e233ccca53..ec5c3ce91868 100644 >> --- a/fs/overlayfs/inode.c >> +++ b/fs/overlayfs/inode.c >> @@ -8,6 +8,7 @@ >> */ >> >> #include <linux/fs.h> >> +#include <linux/mount.h> >> #include <linux/slab.h> >> #include <linux/cred.h> >> #include <linux/xattr.h> >> @@ -15,6 +16,21 @@ >> #include <linux/ratelimit.h> >> #include "overlayfs.h" >> >> + >> +static dev_t ovl_get_pseudo_dev(struct dentry *dentry, dev_t dev) >> +{ >> + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; >> + struct ovl_entry *oe = dentry->d_fsdata; >> + >> + if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb->s_dev == dev) >> + return dev; > > This doesn't feel right. Okay, we may want to check against upper > device number for sanity and WARN if doesn't match, although we can > have all sorts of weirdness with btrfs, for example. So the logic > should be: > > if type is upper then use the dev returned by stat > else return the pseudo dev > > >> + >> + if (oe->numlower) >> + return oe->lowerstack[0].layer->pseudo_dev; >> + >> + return dev; >> +} >> + >> int ovl_setattr(struct dentry *dentry, struct iattr *attr) >> { >> int err; >> @@ -121,6 +137,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat, >> */ >> stat->dev = dentry->d_sb->s_dev; >> stat->ino = dentry->d_inode->i_ino; >> + } else { >> + stat->dev = ovl_get_pseudo_dev(dentry, stat->dev); >> } >> >> /* > > The above can be a separate patch. I.e. split this into: > > 1) add ovl_layer infrastructure > 2) use that infrastructure in getattr > > 1) doesn't change functionality while 2) does. > >> diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c >> index de2dac98e147..78e965527167 100644 >> --- a/fs/overlayfs/namei.c >> +++ b/fs/overlayfs/namei.c >> @@ -285,16 +285,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, >> >> >> static int ovl_check_origin(struct dentry *upperdentry, >> - struct path *lowerstack, unsigned int numlower, >> - struct path **stackp, unsigned int *ctrp) >> + struct ovl_path *lower, unsigned int numlower, >> + struct ovl_path **stackp, unsigned int *ctrp) >> { >> struct vfsmount *mnt; >> struct dentry *origin = NULL; >> int i; >> >> - >> for (i = 0; i < numlower; i++) { >> - mnt = lowerstack[i].mnt; >> + mnt = lower[i].layer->mnt; >> origin = ovl_get_origin(upperdentry, mnt); >> if (IS_ERR(origin)) >> return PTR_ERR(origin); >> @@ -308,12 +307,12 @@ static int ovl_check_origin(struct dentry *upperdentry, >> >> BUG_ON(*ctrp); >> if (!*stackp) >> - *stackp = kmalloc(sizeof(struct path), GFP_KERNEL); >> + *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL); >> if (!*stackp) { >> dput(origin); >> return -ENOMEM; >> } >> - **stackp = (struct path) { .dentry = origin, .mnt = mnt }; >> + **stackp = (struct ovl_path){.dentry = origin, .layer = lower[i].layer}; >> *ctrp = 1; >> >> return 0; >> @@ -383,13 +382,13 @@ int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, >> * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. >> * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. >> */ >> -int ovl_verify_index(struct dentry *index, struct path *lowerstack, >> +int ovl_verify_index(struct dentry *index, struct ovl_path *lower, >> unsigned int numlower) >> { >> struct ovl_fh *fh = NULL; >> size_t len; >> - struct path origin = { }; >> - struct path *stack = &origin; >> + struct ovl_path origin = { }; >> + struct ovl_path *stack = &origin; >> unsigned int ctr = 0; >> int err; >> >> @@ -428,7 +427,7 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, >> if (err) >> goto fail; >> >> - err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); >> + err = ovl_check_origin(index, lower, numlower, &stack, &ctr); >> if (!err && !ctr) >> err = -ESTALE; >> if (err) >> @@ -570,11 +569,24 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path, int *idxp) >> } >> BUG_ON(idx > oe->numlower); >> *idxp = idx; >> - *path = oe->lowerstack[idx - 1]; >> + path->dentry = oe->lowerstack[idx - 1].dentry; >> + path->mnt = oe->lowerstack[idx - 1].layer->mnt; >> >> return (idx < oe->numlower) ? idx + 1 : -1; >> } >> >> +static int ovl_find_layer(struct ovl_fs *ofs, struct ovl_path *path) >> +{ >> + int i; >> + >> + for (i = 0; i < ofs->numlower; i++) { >> + if (ofs->lower_layers[i].mnt == path->layer->mnt) >> + break; >> + } >> + >> + return i; >> +} >> + >> struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, >> unsigned int flags) >> { >> @@ -583,7 +595,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, >> struct ovl_fs *ofs = dentry->d_sb->s_fs_info; >> struct ovl_entry *poe = dentry->d_parent->d_fsdata; >> struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; >> - struct path *stack = NULL; >> + struct ovl_path *stack = NULL; >> struct dentry *upperdir, *upperdentry = NULL; >> struct dentry *index = NULL; >> unsigned int ctr = 0; >> @@ -648,17 +660,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, >> >> if (!d.stop && poe->numlower) { >> err = -ENOMEM; >> - stack = kcalloc(ofs->numlower, sizeof(struct path), >> + stack = kcalloc(ofs->numlower, sizeof(struct ovl_path), >> GFP_KERNEL); >> if (!stack) >> goto out_put_upper; >> } >> >> for (i = 0; !d.stop && i < poe->numlower; i++) { >> - struct path lowerpath = poe->lowerstack[i]; >> + struct ovl_path lower = poe->lowerstack[i]; >> >> d.last = i == poe->numlower - 1; >> - err = ovl_lookup_layer(lowerpath.dentry, &d, &this); >> + err = ovl_lookup_layer(lower.dentry, &d, &this); >> if (err) >> goto out_put; >> >> @@ -666,7 +678,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, >> continue; >> >> stack[ctr].dentry = this; >> - stack[ctr].mnt = lowerpath.mnt; >> + stack[ctr].layer = lower.layer; >> ctr++; >> >> if (d.stop) >> @@ -676,10 +688,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, >> poe = roe; >> >> /* Find the current layer on the root dentry */ >> - for (i = 0; i < poe->numlower; i++) >> - if (poe->lowerstack[i].mnt == lowerpath.mnt) >> - break; >> - if (WARN_ON(i == poe->numlower)) >> + i = ovl_find_layer(ofs, &lower); >> + if (WARN_ON(i == ofs->numlower)) >> break; >> } >> } >> @@ -702,7 +712,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, >> goto out_put; >> >> oe->opaque = upperopaque; >> - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); >> + memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr); >> dentry->d_fsdata = oe; >> >> if (upperdentry) >> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h >> index 73ef1e850635..335e9a052995 100644 >> --- a/fs/overlayfs/overlayfs.h >> +++ b/fs/overlayfs/overlayfs.h >> @@ -248,7 +248,7 @@ static inline bool ovl_is_impuredir(struct dentry *dentry) >> /* namei.c */ >> int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, >> struct dentry *origin, bool is_upper, bool set); >> -int ovl_verify_index(struct dentry *index, struct path *lowerstack, >> +int ovl_verify_index(struct dentry *index, struct ovl_path *lower, >> unsigned int numlower); >> int ovl_get_index_name(struct dentry *origin, struct qstr *name); >> int ovl_path_next(int idx, struct dentry *dentry, struct path *path, int *idxp); >> @@ -265,7 +265,7 @@ int ovl_check_d_type_supported(struct path *realpath); >> void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, >> struct dentry *dentry, int level); >> int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, >> - struct path *lowerstack, unsigned int numlower); >> + struct ovl_path *lower, unsigned int numlower); >> >> /* inode.c */ >> int ovl_set_nlink_upper(struct dentry *dentry); >> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h >> index 25d9b5adcd42..93eb6a044dd2 100644 >> --- a/fs/overlayfs/ovl_entry.h >> +++ b/fs/overlayfs/ovl_entry.h >> @@ -17,11 +17,21 @@ struct ovl_config { >> bool index; >> }; >> >> +struct ovl_layer { >> + struct vfsmount *mnt; >> + dev_t pseudo_dev; >> +}; >> + >> +struct ovl_path { >> + struct ovl_layer *layer; >> + struct dentry *dentry; >> +}; >> + >> /* private information held for overlayfs's superblock */ >> struct ovl_fs { >> struct vfsmount *upper_mnt; >> unsigned numlower; >> - struct vfsmount **lower_mnt; >> + struct ovl_layer *lower_layers; >> /* workbasedir is the path at workdir= mount option */ >> struct dentry *workbasedir; >> /* workdir is the 'work' directory under workbasedir */ >> @@ -52,7 +62,7 @@ struct ovl_entry { >> struct rcu_head rcu; >> }; >> unsigned numlower; >> - struct path lowerstack[]; >> + struct ovl_path lowerstack[]; >> }; >> >> struct ovl_entry *ovl_alloc_entry(unsigned int numlower); >> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c >> index bcc123c7706c..0ab657f2c1c8 100644 >> --- a/fs/overlayfs/readdir.c >> +++ b/fs/overlayfs/readdir.c >> @@ -1020,7 +1020,7 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, >> } >> >> int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, >> - struct path *lowerstack, unsigned int numlower) >> + struct ovl_path *lower, unsigned int numlower) >> { >> int err; >> struct dentry *index = NULL; >> @@ -1055,7 +1055,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, >> index = NULL; >> break; >> } >> - err = ovl_verify_index(index, lowerstack, numlower); >> + err = ovl_verify_index(index, lower, numlower); >> /* Cleanup stale and orphan index entries */ >> if (err && (err == -ESTALE || err == -ENOENT)) >> err = ovl_cleanup(dir, index); >> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c >> index 8702803ba328..323dfd7a0236 100644 >> --- a/fs/overlayfs/super.c >> +++ b/fs/overlayfs/super.c >> @@ -219,9 +219,11 @@ static void ovl_put_super(struct super_block *sb) >> if (ufs->upper_mnt && ufs->upperdir_locked) >> ovl_inuse_unlock(ufs->upper_mnt->mnt_root); >> mntput(ufs->upper_mnt); >> - for (i = 0; i < ufs->numlower; i++) >> - mntput(ufs->lower_mnt[i]); >> - kfree(ufs->lower_mnt); >> + for (i = 0; i < ufs->numlower; i++) { >> + mntput(ufs->lower_layers[i].mnt); >> + free_anon_bdev(ufs->lower_layers[i].pseudo_dev); >> + } >> + kfree(ufs->lower_layers); >> >> kfree(ufs->config.lowerdir); >> kfree(ufs->config.upperdir); >> @@ -1026,24 +1028,35 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> } >> >> err = -ENOMEM; >> - ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); >> - if (ufs->lower_mnt == NULL) >> + ufs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), >> + GFP_KERNEL); >> + if (ufs->lower_layers == NULL) >> goto out_put_workdir; >> for (i = 0; i < numlower; i++) { >> - struct vfsmount *mnt = clone_private_mount(&stack[i]); >> + struct vfsmount *mnt; >> + dev_t dev; >> + >> + err = get_anon_bdev(&dev); >> + if (err) { >> + pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); >> + goto out_put_lower_layers; >> + } >> >> + mnt = clone_private_mount(&stack[i]); >> err = PTR_ERR(mnt); >> if (IS_ERR(mnt)) { >> pr_err("overlayfs: failed to clone lowerpath\n"); >> - goto out_put_lower_mnt; >> + free_anon_bdev(dev); >> + goto out_put_lower_layers; >> } >> /* >> - * Make lower_mnt R/O. That way fchmod/fchown on lower file >> + * Make lower layers R/O. That way fchmod/fchown on lower file >> * will fail instead of modifying lower fs. >> */ >> mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; >> >> - ufs->lower_mnt[ufs->numlower] = mnt; >> + ufs->lower_layers[ufs->numlower].mnt = mnt; >> + ufs->lower_layers[ufs->numlower].pseudo_dev = dev; >> ufs->numlower++; >> >> /* Check if all lower layers are on same sb */ >> @@ -1059,13 +1072,25 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) >> ufs->same_sb = NULL; >> >> + err = -ENOMEM; >> + oe = ovl_alloc_entry(numlower); >> + if (!oe) >> + goto out_put_lower_layers; >> + >> + for (i = 0; i < numlower; i++) { >> + oe->lowerstack[i].dentry = stack[i].dentry; >> + oe->lowerstack[i].layer = &(ufs->lower_layers[i]); >> + } >> + >> if (!(ovl_force_readonly(ufs)) && ufs->config.index) { >> /* Verify lower root is upper root origin */ >> - err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], >> - stack[0].dentry, false, true); >> + err = ovl_verify_origin(upperpath.dentry, >> + oe->lowerstack[0].layer->mnt, >> + oe->lowerstack[0].dentry, >> + false, true); >> if (err) { >> pr_err("overlayfs: failed to verify upper root origin\n"); >> - goto out_put_lower_mnt; >> + goto out_free_oe; >> } >> >> ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, >> @@ -1081,7 +1106,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> if (!err) >> err = ovl_indexdir_cleanup(ufs->indexdir, >> ufs->upper_mnt, >> - stack, numlower); >> + oe->lowerstack, >> + numlower); >> } >> if (err || !ufs->indexdir) >> pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); >> @@ -1106,11 +1132,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> /* Never override disk quota limits or use reserved space */ >> cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); >> >> - err = -ENOMEM; >> - oe = ovl_alloc_entry(numlower); >> - if (!oe) >> - goto out_put_cred; >> - >> sb->s_magic = OVERLAYFS_SUPER_MAGIC; >> sb->s_op = &ovl_super_operations; >> sb->s_xattr = ovl_xattr_handlers; >> @@ -1119,11 +1140,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> >> root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); >> if (!root_dentry) >> - goto out_free_oe; >> + goto out_put_cred; >> >> mntput(upperpath.mnt); >> for (i = 0; i < numlower; i++) >> mntput(stack[i].mnt); >> + kfree(stack); >> mntput(workpath.mnt); >> kfree(lowertmp); >> >> @@ -1132,11 +1154,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> if (ovl_is_impuredir(upperpath.dentry)) >> ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); >> } >> - for (i = 0; i < numlower; i++) { >> - oe->lowerstack[i].dentry = stack[i].dentry; >> - oe->lowerstack[i].mnt = ufs->lower_mnt[i]; >> - } >> - kfree(stack); >> >> root_dentry->d_fsdata = oe; >> >> @@ -1147,16 +1164,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) >> >> return 0; >> >> -out_free_oe: >> - kfree(oe); >> out_put_cred: >> put_cred(ufs->creator_cred); >> out_put_indexdir: >> dput(ufs->indexdir); >> -out_put_lower_mnt: >> - for (i = 0; i < ufs->numlower; i++) >> - mntput(ufs->lower_mnt[i]); >> - kfree(ufs->lower_mnt); >> +out_free_oe: >> + kfree(oe); >> +out_put_lower_layers: >> + for (i = 0; i < ufs->numlower; i++) { >> + if (ufs->lower_layers[i].mnt) >> + free_anon_bdev(ufs->lower_layers[i].pseudo_dev); >> + mntput(ufs->lower_layers[i].mnt); >> + } >> + kfree(ufs->lower_layers); >> out_put_workdir: >> dput(ufs->workdir); >> mntput(ufs->upper_mnt); > > I'm not even attempting to review the ovl_fill_super() changes. We > need to do something about that function (it's my fault for letting it > grow this big and convoluted). Do you want me to have a go at > cleaning it up? Or do you feel an urging desire to do so? > Well, I am up to my neck with the NFS export patches and besides I probably won't get the cleanup done the way you would ;-) so I prefer you have a go at it and I will rebase the patches on top of your cleanup. Thanks, Amir. -- To unsubscribe from this list: send the line "unsubscribe linux-unionfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html