NFS can easily support parallel updates as the locking is done on the server, so this patch enables parallel updates for NFS. NFS unlink needs to block concurrent opens() once it decides to actually unlink the file, rather than rename it to .nfsXXXX (aka sillyrename). It currently does this by temporarily unhashing the dentry and relying on the exclusive lock on the directory to block a ->lookup(). That doesn't work now that unlink uses a shared lock, so an alternate approach is needed. __nfs_lookup_revalidate (->d_revalidate) now blocks if DCACHE_PAR_UPDATE is set, and if nfs_unlink() happens to be called with an exclusive lock and DCACHE_PAR_UPDATE is not set, it get set during the potential race window. I'd rather use some other indicator in the dentry to tell _nfs_lookup_revalidate() to wait, but we are nearly out of d_flags bits, and NFS doesn't have a general-purpose d_fsdata. NFS "silly-rename" may now be called with only a shared lock on the directory, so it needs a bit of extra care to get exclusive access to the new name. d_lock_update_nested() and d_unlock_update() help here. Signed-off-by: NeilBrown <neilb@xxxxxxx> --- fs/nfs/dir.c | 29 +++++++++++++++++++++++------ fs/nfs/inode.c | 2 ++ fs/nfs/unlink.c | 5 ++++- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a8ecdd527662..54c2c7adcd56 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1778,6 +1778,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, int ret; if (flags & LOOKUP_RCU) { + if (dentry->d_flags & DCACHE_PAR_UPDATE) + /* Pending unlink */ + return -ECHILD; parent = READ_ONCE(dentry->d_parent); dir = d_inode_rcu(parent); if (!dir) @@ -1786,6 +1789,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else { + /* Wait for unlink to complete */ + wait_var_event(&dentry->d_flags, + !(dentry->d_flags & DCACHE_PAR_UPDATE)); parent = dget_parent(dentry); ret = reval(d_inode(parent), dentry, flags); dput(parent); @@ -2453,7 +2459,7 @@ static int nfs_safe_remove(struct dentry *dentry) int nfs_unlink(struct inode *dir, struct dentry *dentry) { int error; - int need_rehash = 0; + bool did_set_par_update = false; dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id, dir->i_ino, dentry); @@ -2468,15 +2474,26 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) error = nfs_sillyrename(dir, dentry); goto out; } - if (!d_unhashed(dentry)) { - __d_drop(dentry); - need_rehash = 1; + /* We must prevent any concurrent open until the unlink + * completes. ->d_revalidate will wait for DCACHE_PAR_UPDATE + * to clear, but if this happens to a non-parallel update, we + * still want to block opens. So set DCACHE_PAR_UPDATE + * temporarily. + */ + if (!(dentry->d_flags & DCACHE_PAR_UPDATE)) { + /* Must have exclusive lock on parent */ + did_set_par_update = true; + dentry->d_flags |= DCACHE_PAR_UPDATE; } + spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); - if (need_rehash) - d_rehash(dentry); + if (did_set_par_update) { + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_PAR_UPDATE; + spin_unlock(&dentry->d_lock); + } out: trace_nfs_unlink_exit(dir, dentry, error); return error; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b4e46b0ffa2d..cea2554710d2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -481,6 +481,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; + /* Parallel updates to directories are trivial */ + inode->i_flags |= S_PAR_UPDATE; inode->i_mode = fattr->mode; nfsi->cache_validity = 0; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 9697cd5d2561..52a20eb6131c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -462,6 +462,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) sdentry = NULL; do { int slen; + d_unlock_update(sdentry); dput(sdentry); sillycounter++; slen = scnprintf(silly, sizeof(silly), @@ -479,7 +480,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) */ if (IS_ERR(sdentry)) goto out; - } while (d_inode(sdentry) != NULL); /* need negative lookup */ + } while (!d_lock_update_nested(sdentry, NULL, NULL, + SINGLE_DEPTH_NESTING)); ihold(inode); @@ -524,6 +526,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) rpc_put_task(task); out_dput: iput(inode); + d_unlock_update(sdentry); dput(sdentry); out: return error;