On Wed, Jul 28, 2021 at 08:37:45AM +1000, NeilBrown wrote: > Enhance nfsd to detect internal mounts and to cross them without > requiring a new export. Why don't we want a new export? (Honest question, it's not obvious to me what the best behavior is.) --b. > > Also ensure the fsid reported is different for different submounts. We > do this by xoring in the ino of the mounted-on directory. This makes > sense for btrfs at least. > > Signed-off-by: NeilBrown <neilb@xxxxxxx> > --- > fs/nfsd/nfs3xdr.c | 28 +++++++++++++++++++++------- > fs/nfsd/nfs4xdr.c | 34 +++++++++++++++++++++++----------- > fs/nfsd/nfsfh.c | 7 ++++++- > fs/nfsd/vfs.c | 11 +++++++++-- > 4 files changed, 59 insertions(+), 21 deletions(-) > > diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c > index 67af0c5c1543..80b1cc0334fa 100644 > --- a/fs/nfsd/nfs3xdr.c > +++ b/fs/nfsd/nfs3xdr.c > @@ -370,6 +370,8 @@ svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, > case FSIDSOURCE_UUID: > fsid = ((u64 *)fhp->fh_export->ex_uuid)[0]; > fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1]; > + if (fhp->fh_mnt != fhp->fh_export->ex_path.mnt) > + fsid ^= nfsd_get_mounted_on(fhp->fh_mnt); > break; > default: > fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev); > @@ -1094,8 +1096,8 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, > __be32 rv = nfserr_noent; > > dparent = cd->fh.fh_dentry; > - exp = cd->fh.fh_export; > - child.mnt = cd->fh.fh_mnt; > + exp = exp_get(cd->fh.fh_export); > + child.mnt = mntget(cd->fh.fh_mnt); > > if (isdotent(name, namlen)) { > if (namlen == 2) { > @@ -1112,15 +1114,27 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, > child.dentry = dget(dparent); > } else > child.dentry = lookup_positive_unlocked(name, dparent, namlen); > - if (IS_ERR(child.dentry)) > + if (IS_ERR(child.dentry)) { > + mntput(child.mnt); > + exp_put(exp); > return rv; > - if (d_mountpoint(child.dentry)) > - goto out; > - if (child.dentry->d_inode->i_ino != ino) > + } > + /* If child is a mountpoint, then we want to expose the fact > + * so client can create a mountpoint. If not, then a different > + * ino number probably means a race with rename, so avoid providing > + * too much detail. > + */ > + if (nfsd_mountpoint(child.dentry, exp)) { > + int err; > + err = nfsd_cross_mnt(cd->rqstp, &child, &exp); > + if (err) > + goto out; > + } else if (child.dentry->d_inode->i_ino != ino) > goto out; > rv = fh_compose(fhp, exp, &child, &cd->fh); > out: > - dput(child.dentry); > + path_put(&child); > + exp_put(exp); > return rv; > } > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index d5683b6a74b2..4dbc99ed2c8b 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -2817,6 +2817,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > struct kstat stat; > struct svc_fh *tempfh = NULL; > struct kstatfs statfs; > + u64 mounted_on_ino; > + u64 sub_fsid; > __be32 *p; > int starting_len = xdr->buf->len; > int attrlen_offset; > @@ -2871,6 +2873,24 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > goto out; > fhp = tempfh; > } > + if ((bmval0 & FATTR4_WORD0_FSID) || > + (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID)) { > + mounted_on_ino = stat.ino; > + sub_fsid = 0; > + /* > + * The inode number that the current mnt is mounted on is > + * used for MOUNTED_ON_FILED if we are at the root, > + * and for sub_fsid if mnt is not the export mnt. > + */ > + if (ignore_crossmnt == 0) { > + u64 moi = nfsd_get_mounted_on(mnt); > + > + if (dentry == mnt->mnt_root && moi) > + mounted_on_ino = moi; > + if (mnt != exp->ex_path.mnt) > + sub_fsid = moi; > + } > + } > if (bmval0 & FATTR4_WORD0_ACL) { > err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); > if (err == -EOPNOTSUPP) > @@ -3008,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > case FSIDSOURCE_UUID: > p = xdr_encode_opaque_fixed(p, exp->ex_uuid, > EX_UUID_LEN); > + if (mnt != exp->ex_path.mnt) > + *(u64*)(p-2) ^= sub_fsid; > break; > } > } > @@ -3253,20 +3275,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, > *p++ = cpu_to_be32(stat.mtime.tv_nsec); > } > if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { > - u64 ino; > - > p = xdr_reserve_space(xdr, 8); > if (!p) > goto out_resource; > - /* > - * Get parent's attributes if not ignoring crossmount > - * and this is the root of a cross-mounted filesystem. > - */ > - if (ignore_crossmnt == 0 && dentry == mnt->mnt_root) > - ino = nfsd_get_mounted_on(mnt); > - if (!ino) > - ino = stat.ino; > - p = xdr_encode_hyper(p, ino); > + p = xdr_encode_hyper(p, mounted_on_ino); > } > #ifdef CONFIG_NFSD_PNFS > if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) { > diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c > index 4023046f63e2..4b53838bca89 100644 > --- a/fs/nfsd/nfsfh.c > +++ b/fs/nfsd/nfsfh.c > @@ -9,7 +9,7 @@ > */ > > #include <linux/exportfs.h> > - > +#include <linux/namei.h> > #include <linux/sunrpc/svcauth_gss.h> > #include "nfsd.h" > #include "vfs.h" > @@ -285,6 +285,11 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) > default: > dentry = ERR_PTR(-ESTALE); > } > + } else if (nfsd_mountpoint(dentry, exp)) { > + struct path path = { .mnt = mnt, .dentry = dentry }; > + follow_down(&path, LOOKUP_AUTOMOUNT); > + mnt = path.mnt; > + dentry = path.dentry; > } > } > if (dentry == NULL) > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index baa12ac36ece..22523e1cd478 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -64,7 +64,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct path *path_parent, > .dentry = dget(path_parent->dentry)}; > int err = 0; > > - err = follow_down(&path, 0); > + err = follow_down(&path, LOOKUP_AUTOMOUNT); > if (err < 0) > goto out; > if (path.mnt == path_parent->mnt && path.dentry == path_parent->dentry && > @@ -73,6 +73,13 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct path *path_parent, > path_put(&path); > goto out; > } > + if (mount_is_internal(path.mnt)) { > + /* Use the new path, but don't look for a new export */ > + /* FIXME should I check NOHIDE in this case?? */ > + path_put(path_parent); > + *path_parent = path; > + goto out; > + } > > exp2 = rqst_exp_get_by_name(rqstp, &path); > if (IS_ERR(exp2)) { > @@ -157,7 +164,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) > return 1; > if (nfsd4_is_junction(dentry)) > return 1; > - if (d_mountpoint(dentry)) > + if (d_managed(dentry)) > /* > * Might only be a mountpoint in a different namespace, > * but we need to check. >