> On Jun 13, 2022, at 7:18 PM, NeilBrown <neilb@xxxxxxx> wrote: > > Rather than getting write access, locking the directory, and performing > a lookup, use > filename_create_one_len() for create, or > lookup_hash_update_len() for delete > which combines all these steps and handles shared locking for concurrent > updates where appropriate. > > As we don't use fh_lock() we need to call fh_fill_pre_attrs() > explicitly. However if we only get a shared lock, then the pre/post > attributes won't be atomic, so we need to ensure that fh know that, > and doesn't try to encode wcc attrs either. > > Note that there is only one filesystem that allows shared locks for > create/unlink and that is NFS (for re-export). NFS does support atomic > pre/post attributes, so there is no loss in not providing them. Should this be "NFS does /not/ support atomic pre/post ..." ? > When some other filesystem supports concurrent updates, we might need to > consider if the pre/post attributes are more important than the > parallelism. My impression is that pre/post attributes in NFSv3 have not turned out to be as useful as their inventors predicted. I think this is mostly a "switch to the new API" patch, so Reviewed-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > Signed-off-by: NeilBrown <neilb@xxxxxxx> > --- > fs/nfsd/nfs3proc.c | 31 +++++------- > fs/nfsd/nfs4proc.c | 32 +++++------- > fs/nfsd/nfsfh.c | 7 ++- > fs/nfsd/nfsfh.h | 4 +- > fs/nfsd/nfsproc.c | 29 +++++------ > fs/nfsd/vfs.c | 134 +++++++++++++++++++++++----------------------------- > 6 files changed, 105 insertions(+), 132 deletions(-) > > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c > index 981a3a7a6e16..0fdbb9504a87 100644 > --- a/fs/nfsd/nfs3proc.c > +++ b/fs/nfsd/nfs3proc.c > @@ -231,12 +231,14 @@ static __be32 > nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > struct svc_fh *resfhp, struct nfsd3_createargs *argp) > { > + struct path path; > struct iattr *iap = &argp->attrs; > - struct dentry *parent, *child; > + struct dentry *child; > __u32 v_mtime, v_atime; > struct inode *inode; > __be32 status; > int host_err; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); Nit: I'd prefer sticking with the reverse christmas tree style for variable declarations. Same in other patches that touch NFSD. > if (isdotent(argp->name, argp->len)) > return nfserr_exist; > @@ -247,20 +249,15 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > if (status != nfs_ok) > return status; > > - parent = fhp->fh_dentry; > - inode = d_inode(parent); > + path.dentry = fhp->fh_dentry; > + path.mnt = fhp->fh_export->ex_path.mnt; > + inode = d_inode(path.dentry); > > - host_err = fh_want_write(fhp); > - if (host_err) > - return nfserrno(host_err); > + child = filename_create_one_len(argp->name, argp->len, > + &path, 0, &wq); > > - fh_lock_nested(fhp, I_MUTEX_PARENT); > - > - child = lookup_one_len(argp->name, parent, argp->len); > - if (IS_ERR(child)) { > - status = nfserrno(PTR_ERR(child)); > - goto out; > - } > + if (IS_ERR(child)) > + return nfserrno(PTR_ERR(child)); > > if (d_really_is_negative(child)) { > status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); > @@ -311,6 +308,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > > if (!IS_POSIXACL(inode)) > iap->ia_mode &= ~current_umask(); > + fh_fill_pre_attrs(fhp, (child->d_flags & DCACHE_PAR_UPDATE) == 0); > > host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); > if (host_err < 0) { > @@ -332,12 +330,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > > set_attr: > status = nfsd_create_setattr(rqstp, fhp, resfhp, iap); > - > + fh_fill_post_attrs(fhp); > out: > - fh_unlock(fhp); > - if (child && !IS_ERR(child)) > - dput(child); > - fh_drop_write(fhp); > + done_path_update(&path, child, &wq); > return status; > } > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 3895eb52d2b1..71a4b8ef77f0 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -285,12 +285,13 @@ static __be32 > nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > struct svc_fh *resfhp, struct nfsd4_open *open) > { > + struct path path; > struct iattr *iap = &open->op_iattr; > - struct dentry *parent, *child; > + struct dentry *child; > __u32 v_mtime, v_atime; > struct inode *inode; > __be32 status; > - int host_err; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); Ditto. > if (isdotent(open->op_fname, open->op_fnamelen)) > return nfserr_exist; > @@ -300,20 +301,17 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); > if (status != nfs_ok) > return status; > - parent = fhp->fh_dentry; > - inode = d_inode(parent); > > - host_err = fh_want_write(fhp); > - if (host_err) > - return nfserrno(host_err); > + path.dentry = fhp->fh_dentry; > + path.mnt = fhp->fh_export->ex_path.mnt; > + inode = d_inode(path.dentry); > > - fh_lock_nested(fhp, I_MUTEX_PARENT); > + child = filename_create_one_len(open->op_fname, > + open->op_fnamelen, > + &path, 0, &wq); > > - child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); > - if (IS_ERR(child)) { > - status = nfserrno(PTR_ERR(child)); > - goto out; > - } > + if (IS_ERR(child)) > + return nfserrno(PTR_ERR(child)); > > if (d_really_is_negative(child)) { > status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); > @@ -386,6 +384,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > if (!IS_POSIXACL(inode)) > iap->ia_mode &= ~current_umask(); > > + fh_fill_pre_attrs(fhp, (child->d_flags & DCACHE_PAR_UPDATE) == 0); > status = nfsd4_vfs_create(fhp, child, open); > if (status != nfs_ok) > goto out; > @@ -405,12 +404,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, > > set_attr: > status = nfsd_create_setattr(rqstp, fhp, resfhp, iap); > - > + fh_fill_post_attrs(fhp); > out: > - fh_unlock(fhp); > - if (child && !IS_ERR(child)) > - dput(child); > - fh_drop_write(fhp); > + done_path_update(&path, child, &wq); > return status; > } > > diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c > index c29baa03dfaf..a50db688c60d 100644 > --- a/fs/nfsd/nfsfh.c > +++ b/fs/nfsd/nfsfh.c > @@ -616,7 +616,7 @@ fh_update(struct svc_fh *fhp) > * @fhp: file handle to be updated > * > */ > -void fh_fill_pre_attrs(struct svc_fh *fhp) > +void fh_fill_pre_attrs(struct svc_fh *fhp, bool atomic) > { > bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); > struct inode *inode; > @@ -626,6 +626,11 @@ void fh_fill_pre_attrs(struct svc_fh *fhp) > if (fhp->fh_no_wcc || fhp->fh_pre_saved) > return; > > + if (!atomic) { > + fhp->fh_no_atomic_attr = true; > + fhp->fh_no_wcc = true; > + } > + > inode = d_inode(fhp->fh_dentry); > err = fh_getattr(fhp, &stat); > if (err) { > diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h > index fb9d358a267e..ecc57fd3fd67 100644 > --- a/fs/nfsd/nfsfh.h > +++ b/fs/nfsd/nfsfh.h > @@ -320,7 +320,7 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat, > return time_to_chattr(&stat->ctime); > } > > -extern void fh_fill_pre_attrs(struct svc_fh *fhp); > +extern void fh_fill_pre_attrs(struct svc_fh *fhp, bool atomic); > extern void fh_fill_post_attrs(struct svc_fh *fhp); > > > @@ -347,7 +347,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) > > inode = d_inode(dentry); > inode_lock_nested(inode, subclass); > - fh_fill_pre_attrs(fhp); > + fh_fill_pre_attrs(fhp, true); > fhp->fh_locked = true; > } > > diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c > index fcdab8a8a41f..2dccf77634e8 100644 > --- a/fs/nfsd/nfsproc.c > +++ b/fs/nfsd/nfsproc.c > @@ -255,6 +255,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) > static __be32 > nfsd_proc_create(struct svc_rqst *rqstp) > { > + struct path path; > struct nfsd_createargs *argp = rqstp->rq_argp; > struct nfsd_diropres *resp = rqstp->rq_resp; > svc_fh *dirfhp = &argp->fh; > @@ -263,8 +264,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) > struct inode *inode; > struct dentry *dchild; > int type, mode; > - int hosterr; > dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); Ditto. > dprintk("nfsd: CREATE %s %.*s\n", > SVCFH_fmt(dirfhp), argp->len, argp->name); > @@ -279,17 +280,13 @@ nfsd_proc_create(struct svc_rqst *rqstp) > resp->status = nfserr_exist; > if (isdotent(argp->name, argp->len)) > goto done; > - hosterr = fh_want_write(dirfhp); > - if (hosterr) { > - resp->status = nfserrno(hosterr); > - goto done; > - } > > - fh_lock_nested(dirfhp, I_MUTEX_PARENT); > - dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); > + path.dentry = dirfhp->fh_dentry; > + path.mnt = dirfhp->fh_export->ex_path.mnt; > + dchild = filename_create_one_len(argp->name, argp->len, &path, 0, &wq); > if (IS_ERR(dchild)) { > resp->status = nfserrno(PTR_ERR(dchild)); > - goto out_unlock; > + goto out_done; > } > fh_init(newfhp, NFS_FHSIZE); > resp->status = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp); > @@ -298,7 +295,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) > dput(dchild); > if (resp->status) { > if (resp->status != nfserr_noent) > - goto out_unlock; > + goto out_done; > /* > * If the new file handle wasn't verified, we can't tell > * whether the file exists or not. Time to bail ... > @@ -307,7 +304,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) > if (!newfhp->fh_dentry) { > printk(KERN_WARNING > "nfsd_proc_create: file handle not verified\n"); > - goto out_unlock; > + goto out_done; > } > } > > @@ -341,7 +338,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) > newfhp->fh_dentry, > NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); > if (resp->status && resp->status != nfserr_rofs) > - goto out_unlock; > + goto out_done; > } > } else > type = S_IFREG; > @@ -378,7 +375,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) > /* Make sure the type and device matches */ > resp->status = nfserr_exist; > if (inode && inode_wrong_type(inode, type)) > - goto out_unlock; > + goto out_done; > } > > resp->status = nfs_ok; > @@ -400,10 +397,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) > (time64_t)0); > } > > -out_unlock: > - /* We don't really need to unlock, as fh_put does it. */ > - fh_unlock(dirfhp); > - fh_drop_write(dirfhp); > +out_done: > + done_path_update(&path, dchild, &wq); > done: > fh_put(dirfhp); > if (resp->status != nfs_ok) > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 840e3af63a6f..6cdd5e407600 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1274,12 +1274,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > dirp = d_inode(dentry); > > dchild = dget(resfhp->fh_dentry); > - if (!fhp->fh_locked) { > - WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n", > - dentry); > - err = nfserr_io; > - goto out; > - } > > err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); > if (err) > @@ -1362,9 +1356,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > char *fname, int flen, struct iattr *iap, > int type, dev_t rdev, struct svc_fh *resfhp) > { > - struct dentry *dentry, *dchild = NULL; > + struct path path; > + struct dentry *dchild = NULL; > __be32 err; > int host_err; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); > > if (isdotent(fname, flen)) > return nfserr_exist; > @@ -1373,27 +1369,22 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > if (err) > return err; > > - dentry = fhp->fh_dentry; > - > - host_err = fh_want_write(fhp); > - if (host_err) > - return nfserrno(host_err); > + path.dentry = fhp->fh_dentry; > + path.mnt = fhp->fh_export->ex_path.mnt; > > - fh_lock_nested(fhp, I_MUTEX_PARENT); > - dchild = lookup_one_len(fname, dentry, flen); > + dchild = filename_create_one_len(fname, flen, &path, 0, &wq); > host_err = PTR_ERR(dchild); > if (IS_ERR(dchild)) > return nfserrno(host_err); > err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); > - /* > - * We unconditionally drop our ref to dchild as fh_compose will have > - * already grabbed its own ref for it. > - */ > - dput(dchild); > - if (err) > - return err; > - return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, > - rdev, resfhp); > + if (!err) { > + fh_fill_pre_attrs(fhp, (dchild->d_flags & DCACHE_PAR_UPDATE) == 0); > + err = nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, > + rdev, resfhp); > + fh_fill_post_attrs(fhp); > + } > + done_path_update(&path, dchild, &wq); > + return err; > } > > /* > @@ -1441,15 +1432,17 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) > __be32 > nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, > char *fname, int flen, > - char *path, > + char *lpath, > struct svc_fh *resfhp) > { > - struct dentry *dentry, *dnew; > + struct path path; > + struct dentry *dnew; > __be32 err, cerr; > int host_err; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); Ditto. > err = nfserr_noent; > - if (!flen || path[0] == '\0') > + if (!flen || lpath[0] == '\0') > goto out; > err = nfserr_exist; > if (isdotent(fname, flen)) > @@ -1459,28 +1452,28 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, > if (err) > goto out; > > - host_err = fh_want_write(fhp); > - if (host_err) > - goto out_nfserr; > + path.dentry = fhp->fh_dentry; > + path.mnt = fhp->fh_export->ex_path.mnt; > > - fh_lock(fhp); > - dentry = fhp->fh_dentry; > - dnew = lookup_one_len(fname, dentry, flen); > + dnew = filename_create_one_len(fname, flen, &path, 0, &wq); > host_err = PTR_ERR(dnew); > if (IS_ERR(dnew)) > goto out_nfserr; > > - host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); > + fh_fill_pre_attrs(fhp, (dnew->d_flags & DCACHE_PAR_UPDATE) == 0); > + host_err = vfs_symlink(mnt_user_ns(path.mnt), d_inode(path.dentry), > + dnew, lpath); > err = nfserrno(host_err); > - fh_unlock(fhp); > if (!err) > err = nfserrno(commit_metadata(fhp)); > > - fh_drop_write(fhp); > + fh_fill_post_attrs(fhp); > > cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); > - dput(dnew); > - if (err==0) err = cerr; > + if (err==0) > + err = cerr; > + > + done_path_update(&path, dnew, &wq); > out: > return err; > > @@ -1497,10 +1490,12 @@ __be32 > nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, > char *name, int len, struct svc_fh *tfhp) > { > - struct dentry *ddir, *dnew, *dold; > + struct path path; > + struct dentry *dold, *dnew; > struct inode *dirp; > __be32 err; > int host_err; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); Ditto. > err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); > if (err) > @@ -1518,17 +1513,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, > if (isdotent(name, len)) > goto out; > > - host_err = fh_want_write(tfhp); > - if (host_err) { > - err = nfserrno(host_err); > - goto out; > - } > - > - fh_lock_nested(ffhp, I_MUTEX_PARENT); > - ddir = ffhp->fh_dentry; > - dirp = d_inode(ddir); > + path.dentry = ffhp->fh_dentry; > + path.mnt = ffhp->fh_export->ex_path.mnt; > + dirp = d_inode(path.dentry); > > - dnew = lookup_one_len(name, ddir, len); > + dnew = filename_create_one_len(name, len, &path, 0, &wq); > host_err = PTR_ERR(dnew); > if (IS_ERR(dnew)) > goto out_nfserr; > @@ -1537,9 +1526,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, > > err = nfserr_noent; > if (d_really_is_negative(dold)) > - goto out_dput; > + goto out_done; > + fh_fill_pre_attrs(ffhp, (dnew->d_flags & DCACHE_PAR_UPDATE) == 0); > host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); > - fh_unlock(ffhp); > + > if (!host_err) { > err = nfserrno(commit_metadata(ffhp)); > if (!err) > @@ -1550,17 +1540,15 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, > else > err = nfserrno(host_err); > } > -out_dput: > - dput(dnew); > -out_unlock: > - fh_unlock(ffhp); > - fh_drop_write(tfhp); > +out_done: > + fh_fill_post_attrs(ffhp); > + done_path_update(&path, dnew, &wq); > out: > return err; > > out_nfserr: > err = nfserrno(host_err); > - goto out_unlock; > + goto out; > } > > static void > @@ -1625,8 +1613,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, > * so do it by hand */ > trap = lock_rename(tdentry, fdentry); > ffhp->fh_locked = tfhp->fh_locked = true; > - fh_fill_pre_attrs(ffhp); > - fh_fill_pre_attrs(tfhp); > + fh_fill_pre_attrs(ffhp, true); > + fh_fill_pre_attrs(tfhp, true); > > odentry = lookup_one_len(fname, fdentry, flen); > host_err = PTR_ERR(odentry); > @@ -1717,11 +1705,13 @@ __be32 > nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, > char *fname, int flen) > { > - struct dentry *dentry, *rdentry; > + struct dentry *rdentry; > struct inode *dirp; > struct inode *rinode; > __be32 err; > int host_err; > + struct path path; > + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); Ditto. > err = nfserr_acces; > if (!flen || isdotent(fname, flen)) > @@ -1730,24 +1720,18 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, > if (err) > goto out; > > - host_err = fh_want_write(fhp); > - if (host_err) > - goto out_nfserr; > + path.mnt = fhp->fh_export->ex_path.mnt; > + path.dentry = fhp->fh_dentry; > > - fh_lock_nested(fhp, I_MUTEX_PARENT); > - dentry = fhp->fh_dentry; > - dirp = d_inode(dentry); > + rdentry = lookup_hash_update_len(fname, flen, &path, 0, &wq); > + dirp = d_inode(path.dentry); > > - rdentry = lookup_one_len(fname, dentry, flen); > host_err = PTR_ERR(rdentry); > if (IS_ERR(rdentry)) > - goto out_drop_write; > + goto out_nfserr; > + > + fh_fill_pre_attrs(fhp, (rdentry->d_flags & DCACHE_PAR_UPDATE) == 0); > > - if (d_really_is_negative(rdentry)) { > - dput(rdentry); > - host_err = -ENOENT; > - goto out_drop_write; > - } > rinode = d_inode(rdentry); > ihold(rinode); > > @@ -1761,15 +1745,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, > } else { > host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); > } > + fh_fill_post_attrs(fhp); > > - fh_unlock(fhp); > + done_path_update(&path, rdentry, &wq); > if (!host_err) > host_err = commit_metadata(fhp); > - dput(rdentry); > iput(rinode); /* truncate the inode here */ > > -out_drop_write: > - fh_drop_write(fhp); > out_nfserr: > if (host_err == -EBUSY) { > /* name is mounted-on. There is no perfect > > -- Chuck Lever