On Wed, 2011-01-26 at 15:14 -0500, Trond Myklebust wrote: > The alternative would be to add a callback that can be called after > dentry_iput() if DCACHE_NFSFS_RENAMED is true, and that takes the parent > and (negative) dentry as the arguments. > sillyrename doesn't need the inode as an argument, but it definitely > needs the parent dentry so that it can check for races with > ->lookup()... The following (compile tested only!) patch illustrates what I mean. Cheers Trond 8<-------------------------------------------------------------------------- >From 1f55305d8cc82d2cb4ed37d3782f62bd378d7fd4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> Date: Wed, 26 Jan 2011 15:41:56 -0500 Subject: [PATCH] NFS/VFS: Fix the nfs sillyrename regression Commit 949854d0 (fs: Use rename lock and RCU for multi-step operations) broke sillyrename on NFS because it NULLs out the dentry->d_parent prior to calling dentry->d_op->iput() The following patch adds a new callback that is used by d_kill() to notify the filesystem that the sillyrenamed dentry is no longer in use, and that it may be unlinked. It takes the parent dentry and negative dentry as its argument, and is used by the NFS layer to complete the sillyrename. Signed-off-by: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> --- Documentation/filesystems/Locking | 2 ++ Documentation/filesystems/vfs.txt | 9 +++++++++ fs/dcache.c | 3 +++ fs/nfs/dir.c | 12 ++++++++---- fs/nfs/unlink.c | 22 ++++++++-------------- include/linux/dcache.h | 1 + include/linux/nfs_fs.h | 2 +- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4471a41..5362714 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -21,6 +21,7 @@ prototypes: char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); struct vfsmount *(*d_automount)(struct path *path); int (*d_manage)(struct dentry *, bool); + void (*d_unlink)(struct dentry *, struct dentry *); locking rules: rename_lock ->d_lock may block rcu-walk @@ -33,6 +34,7 @@ d_iput: no no yes no d_dname: no no no no d_automount: no no yes no d_manage: no no yes (ref-walk) maybe +d_unlink no no yes no --------------------------- inode_operations --------------------------- prototypes: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 94cf97b..a69d978 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -866,6 +866,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool, bool); + void (*d_unlink)(struct dentry *, struct dentry *); }; d_revalidate: called when the VFS needs to revalidate a dentry. This @@ -973,6 +974,14 @@ struct dentry_operations { This function is only used if DCACHE_MANAGE_TRANSIT is set on the dentry being transited from. + d_unlink: called to allow the filesystem to unlink the dentry after final + use. It is only called when DCACHE_NFSFS_RENAMED is set, and is + designed for use by 'sillyrename' schemes that are commonly + implemented on distributed filesystems such as NFS. + + Note that the filesystem is still responsible for protecting against + races with other lookups. + Example : static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen) diff --git a/fs/dcache.c b/fs/dcache.c index 2a6bd9a..f4dea50 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -301,6 +301,9 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); + + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + dentry->d_op->d_unlink(parent, dentry); /* * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2c3eb33..322c882 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1161,19 +1161,22 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) if (S_ISDIR(inode->i_mode)) /* drop any readdir cache as it could easily be old */ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; - - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) drop_nlink(inode); - nfs_complete_unlink(dentry, inode); - } iput(inode); } +static void nfs_d_unlink(struct dentry *parent, struct dentry *dentry) +{ + nfs_complete_unlink(parent, dentry); +} + const struct dentry_operations nfs_dentry_operations = { .d_revalidate = nfs_lookup_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, + .d_unlink = nfs_d_unlink, }; static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) @@ -1248,6 +1251,7 @@ const struct dentry_operations nfs4_dentry_operations = { .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, + .d_unlink = nfs_d_unlink, }; /* diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index e313a51..9160cad 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -184,19 +184,17 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n return 1; } -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +static int nfs_call_unlink(struct dentry *parent, struct dentry *dentry, struct nfs_unlinkdata *data) { - struct dentry *parent; struct inode *dir; int ret = 0; - parent = dget_parent(dentry); if (parent == NULL) - goto out_free; + goto out; dir = parent->d_inode; if (nfs_copy_dname(dentry, data) != 0) - goto out_dput; + goto out; /* Non-exclusive lock protects against concurrent lookup() calls */ spin_lock(&dir->i_lock); if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { @@ -204,13 +202,11 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) hlist_add_head(&data->list, &NFS_I(dir)->silly_list); spin_unlock(&dir->i_lock); ret = 1; - goto out_dput; + goto out; } spin_unlock(&dir->i_lock); ret = nfs_do_call_unlink(parent, dir, data); -out_dput: - dput(parent); -out_free: +out: return ret; } @@ -283,26 +279,24 @@ out: /** * nfs_complete_unlink - Initialize completion of the sillydelete + * @parent: parent directory * @dentry: dentry to delete - * @inode: inode * * Since we're most likely to be called by dentry_iput(), we * only use the dentry to find the sillydelete. We then copy the name * into the qstr. */ void -nfs_complete_unlink(struct dentry *dentry, struct inode *inode) +nfs_complete_unlink(struct dentry *parent, struct dentry *dentry) { struct nfs_unlinkdata *data = NULL; - spin_lock(&dentry->d_lock); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; data = dentry->d_fsdata; } - spin_unlock(&dentry->d_lock); - if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) + if (data != NULL && !nfs_call_unlink(parent, dentry, data)) nfs_free_unlinkdata(data); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f958c19..a3fd768 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -169,6 +169,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool, bool); + void (*d_unlink)(struct dentry *, struct dentry *); } ____cacheline_aligned; /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6023efa..11d4b40 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -488,7 +488,7 @@ extern void nfs_release_automount_timer(void); /* * linux/fs/nfs/unlink.c */ -extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); +extern void nfs_complete_unlink(struct dentry *dentry, struct dentry *); extern void nfs_block_sillyrename(struct dentry *dentry); extern void nfs_unblock_sillyrename(struct dentry *dentry); extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); -- 1.7.3.5 -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@xxxxxxxxxx www.netapp.com -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html