On Wed, Jul 28, 2021 at 08:37:45AM +1000, NeilBrown wrote: > When a filesystem has internal mounts, it controls the filehandles > across all those mounts (subvols) in the filesystem. So it is useful to > be able to look up a filehandle again one mount, and get a result which > is in a different mount (part of the same overall file system). > > This patch makes that possible by changing export_decode_fh() and > export_decode_fh_raw() to take a vfsmount pointer by reference, and > possibly change the vfsmount pointed to before returning. > > The core of the change is in reconnect_path() which now not only checks > that the dentry is fully connected, but also that the vfsmnt reported > has the same 'dev' (reported by vfs_getattr) as the dentry. > If it doesn't, we walk up the dparent() chain to find the highest place > where the dev changes without there being a mount point, and trigger an > automount there. > > As no filesystems yet provide local-mounts, this does not yet change any > behaviour. > > In exportfs_decode_fh_raw() we previously tested for DCACHE_DISCONNECT > before calling reconnect_path(). That test is dropped. It was only a > minor optimisation and is now inconvenient. > > The change in overlayfs needs more careful thought than I have yet given > it. > > Signed-off-by: NeilBrown <neilb@xxxxxxx> > --- > fs/exportfs/expfs.c | 100 +++++++++++++++++++++++++++++++++++++++------- > fs/fhandle.c | 2 - > fs/nfsd/nfsfh.c | 9 +++- > fs/overlayfs/namei.c | 5 ++ > fs/xfs/xfs_ioctl.c | 12 ++++-- > include/linux/exportfs.h | 4 +- > 6 files changed, 106 insertions(+), 26 deletions(-) > > diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c > index 0106eba46d5a..2d7c42137b49 100644 > --- a/fs/exportfs/expfs.c > +++ b/fs/exportfs/expfs.c > @@ -207,11 +207,18 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, > * that case reconnect_path may still succeed with target_dir fully > * connected, but further operations using the filehandle will fail when > * necessary (due to S_DEAD being set on the directory). > + * > + * If the filesystem supports multiple subvols, then *mntp may be updated > + * to a subordinate mount point on the same filesystem. > */ > static int > -reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) > +reconnect_path(struct vfsmount **mntp, struct dentry *target_dir, char *nbuf) > { > + struct vfsmount *mnt = *mntp; > + struct path path; > struct dentry *dentry, *parent; > + struct kstat stat; > + dev_t target_dev; > > dentry = dget(target_dir); > > @@ -232,6 +239,68 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) > } > dput(dentry); > clear_disconnected(target_dir); Minor nit--I'd prefer the following in a separate function. --b. > + > + /* Need to find appropriate vfsmount, which might not exist yet. > + * We may need to trigger automount points. > + */ > + path.mnt = mnt; > + path.dentry = target_dir; > + vfs_getattr_nosec(&path, &stat, 0, AT_STATX_DONT_SYNC); > + target_dev = stat.dev; > + > + path.dentry = mnt->mnt_root; > + vfs_getattr_nosec(&path, &stat, 0, AT_STATX_DONT_SYNC); > + > + while (stat.dev != target_dev) { > + /* walk up the dcache tree from target_dir, recording the > + * location of the most recent change in dev number, > + * until we find a mountpoint. > + * If there was no change in show_dev result before the > + * mountpount, the vfsmount at the mountpoint is what we want. > + * If there was, we need to trigger an automount where the > + * show_dev() result changed. > + */ > + struct dentry *last_change = NULL; > + dev_t last_dev = target_dev; > + > + dentry = dget(target_dir); > + while ((parent = dget_parent(dentry)) != dentry) { > + path.dentry = parent; > + vfs_getattr_nosec(&path, &stat, 0, AT_STATX_DONT_SYNC); > + if (stat.dev != last_dev) { > + path.dentry = dentry; > + mnt = lookup_mnt(&path); > + if (mnt) { > + mntput(path.mnt); > + path.mnt = mnt; > + break; > + } > + dput(last_change); > + last_change = dget(dentry); > + last_dev = stat.dev; > + } > + dput(dentry); > + dentry = parent; > + } > + dput(dentry); dput(parent); > + > + if (!last_change) > + break; > + > + mnt = path.mnt; > + path.dentry = last_change; > + follow_down(&path, LOOKUP_AUTOMOUNT); > + dput(path.dentry); > + if (path.mnt == mnt) > + /* There should have been a mount-trap there, > + * but there wasn't. Just give up. > + */ > + break; > + > + path.dentry = mnt->mnt_root; > + vfs_getattr_nosec(&path, &stat, 0, AT_STATX_DONT_SYNC); > + } > + *mntp = path.mnt; > return 0; > } > > @@ -418,12 +487,13 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, > EXPORT_SYMBOL_GPL(exportfs_encode_fh); > > struct dentry * > -exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > +exportfs_decode_fh_raw(struct vfsmount **mntp, struct fid *fid, int fh_len, > int fileid_type, > int (*acceptable)(void *, struct dentry *), > void *context) > { > - const struct export_operations *nop = mnt->mnt_sb->s_export_op; > + struct super_block *sb = (*mntp)->mnt_sb; > + const struct export_operations *nop = sb->s_export_op; > struct dentry *result, *alias; > char nbuf[NAME_MAX+1]; > int err; > @@ -433,7 +503,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > */ > if (!nop || !nop->fh_to_dentry) > return ERR_PTR(-ESTALE); > - result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); > + result = nop->fh_to_dentry(sb, fid, fh_len, fileid_type); > if (IS_ERR_OR_NULL(result)) > return result; > > @@ -452,14 +522,12 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > * > * On the positive side there is only one dentry for each > * directory inode. On the negative side this implies that we > - * to ensure our dentry is connected all the way up to the > + * need to ensure our dentry is connected all the way up to the > * filesystem root. > */ > - if (result->d_flags & DCACHE_DISCONNECTED) { > - err = reconnect_path(mnt, result, nbuf); > - if (err) > - goto err_result; > - } > + err = reconnect_path(mntp, result, nbuf); > + if (err) > + goto err_result; > > if (!acceptable(context, result)) { > err = -EACCES; > @@ -494,7 +562,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > if (!nop->fh_to_parent) > goto err_result; > > - target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, > + target_dir = nop->fh_to_parent(sb, fid, > fh_len, fileid_type); > if (!target_dir) > goto err_result; > @@ -507,7 +575,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > * connected to the filesystem root. The VFS really doesn't > * like disconnected directories.. > */ > - err = reconnect_path(mnt, target_dir, nbuf); > + err = reconnect_path(mntp, target_dir, nbuf); > if (err) { > dput(target_dir); > goto err_result; > @@ -518,7 +586,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > * dentry for the inode we're after, make sure that our > * inode is actually connected to the parent. > */ > - err = exportfs_get_name(mnt, target_dir, nbuf, result); > + err = exportfs_get_name(*mntp, target_dir, nbuf, result); > if (err) { > dput(target_dir); > goto err_result; > @@ -556,7 +624,7 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > goto err_result; > } > > - return alias; > + return result; > } > > err_result: > @@ -565,14 +633,14 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, > } > EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); > > -struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, > +struct dentry *exportfs_decode_fh(struct vfsmount **mntp, struct fid *fid, > int fh_len, int fileid_type, > int (*acceptable)(void *, struct dentry *), > void *context) > { > struct dentry *ret; > > - ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, > + ret = exportfs_decode_fh_raw(mntp, fid, fh_len, fileid_type, > acceptable, context); > if (IS_ERR_OR_NULL(ret)) { > if (ret == ERR_PTR(-ENOMEM)) > diff --git a/fs/fhandle.c b/fs/fhandle.c > index 6630c69c23a2..b47c7696469f 100644 > --- a/fs/fhandle.c > +++ b/fs/fhandle.c > @@ -149,7 +149,7 @@ static int do_handle_to_path(int mountdirfd, struct file_handle *handle, > } > /* change the handle size to multiple of sizeof(u32) */ > handle_dwords = handle->handle_bytes >> 2; > - path->dentry = exportfs_decode_fh(path->mnt, > + path->dentry = exportfs_decode_fh(&path->mnt, > (struct fid *)handle->f_handle, > handle_dwords, handle->handle_type, > vfs_dentry_acceptable, NULL); > diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c > index 0bf7ac13ae50..4023046f63e2 100644 > --- a/fs/nfsd/nfsfh.c > +++ b/fs/nfsd/nfsfh.c > @@ -157,6 +157,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) > struct fid *fid = NULL, sfid; > struct svc_export *exp; > struct dentry *dentry; > + struct vfsmount *mnt = NULL; > int fileid_type; > int data_left = fh->fh_size/4; > __be32 error; > @@ -253,6 +254,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) > if (rqstp->rq_vers > 2) > error = nfserr_badhandle; > > + mnt = mntget(exp->ex_path.mnt); > + > if (fh->fh_version != 1) { > sfid.i32.ino = fh->ofh_ino; > sfid.i32.gen = fh->ofh_generation; > @@ -269,7 +272,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) > if (fileid_type == FILEID_ROOT) > dentry = dget(exp->ex_path.dentry); > else { > - dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, > + dentry = exportfs_decode_fh_raw(&mnt, fid, > data_left, fileid_type, > nfsd_acceptable, exp); > if (IS_ERR_OR_NULL(dentry)) { > @@ -299,7 +302,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) > } > > fhp->fh_dentry = dentry; > - fhp->fh_mnt = mntget(exp->ex_path.mnt); > + fhp->fh_mnt = mnt; > fhp->fh_export = exp; > > switch (rqstp->rq_vers) { > @@ -317,6 +320,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) > > return 0; > out: > + mntput(mnt); > exp_put(exp); > return error; > } > @@ -428,7 +432,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) > return error; > } > > - > /* > * Compose a file handle for an NFS reply. > * > diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c > index 210cd6f66e28..0bca19f6df54 100644 > --- a/fs/overlayfs/namei.c > +++ b/fs/overlayfs/namei.c > @@ -155,6 +155,7 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, > { > struct dentry *real; > int bytes; > + struct vfsmount *mnt2; > > if (!capable(CAP_DAC_READ_SEARCH)) > return NULL; > @@ -169,9 +170,11 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, > return NULL; > > bytes = (fh->fb.len - offsetof(struct ovl_fb, fid)); > - real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid, > + mnt2 = mntget(mnt); > + real = exportfs_decode_fh(&mnt2, (struct fid *)fh->fb.fid, > bytes >> 2, (int)fh->fb.type, > connected ? ovl_acceptable : NULL, mnt); > + mntput(mnt2); > if (IS_ERR(real)) { > /* > * Treat stale file handle to lower file as "origin unknown". > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > index 16039ea10ac9..76eb7d540811 100644 > --- a/fs/xfs/xfs_ioctl.c > +++ b/fs/xfs/xfs_ioctl.c > @@ -149,6 +149,8 @@ xfs_handle_to_dentry( > { > xfs_handle_t handle; > struct xfs_fid64 fid; > + struct dentry *ret; > + struct vfsmount *mnt; > > /* > * Only allow handle opens under a directory. > @@ -168,9 +170,13 @@ xfs_handle_to_dentry( > fid.ino = handle.ha_fid.fid_ino; > fid.gen = handle.ha_fid.fid_gen; > > - return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, > - FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, > - xfs_handle_acceptable, NULL); > + mnt = mntget(parfilp->f_path.mnt); > + ret = exportfs_decode_fh(&mnt, (struct fid *)&fid, 3, > + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, > + xfs_handle_acceptable, NULL); > + WARN_ON(mnt != parfilp->f_path.mnt); > + mntput(mnt); > + return ret; > } > > STATIC struct dentry * > diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h > index fe848901fcc3..9a8c5434a5cf 100644 > --- a/include/linux/exportfs.h > +++ b/include/linux/exportfs.h > @@ -228,12 +228,12 @@ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, > int *max_len, struct inode *parent); > extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, > int *max_len, int connectable); > -extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, > +extern struct dentry *exportfs_decode_fh_raw(struct vfsmount **mntp, > struct fid *fid, int fh_len, > int fileid_type, > int (*acceptable)(void *, struct dentry *), > void *context); > -extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, > +extern struct dentry *exportfs_decode_fh(struct vfsmount **mnt, struct fid *fid, > int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), > void *context); > >