On Thu, 8 Aug 2013 17:24:42 +0200 Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > From: Miklos Szeredi <mszeredi@xxxxxxx> > > We check submounts before doing d_drop() on a non-empty directory dentry in > NFS (have_submounts()), but we do not exclude a racing mount. > > Process A: have_submounts() -> returns false > Process B: mount() -> success > Process A: d_drop() > > This patch prepares the ground for the fix by doing the following > operations all under the same rename lock: > > have_submounts() > shrink_dcache_parent() > d_drop() > > This is actually an optimization since have_submounts() and > shrink_dcache_parent() both traverse the same dentry tree separately. > > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx> > CC: David Howells <dhowells@xxxxxxxxxx> > CC: Steven Whitehouse <swhiteho@xxxxxxxxxx> > CC: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> > CC: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> > --- > fs/dcache.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/dcache.h | 1 + > 2 files changed, 158 insertions(+) > > diff --git a/fs/dcache.c b/fs/dcache.c > index 87bdb53..020004d 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -1224,6 +1224,163 @@ void shrink_dcache_parent(struct dentry * parent) > } > EXPORT_SYMBOL(shrink_dcache_parent); > > +static int __check_submounts_and_drop(struct dentry *parent, > + struct list_head *dispose) > +{ > + struct dentry *this_parent; > + struct list_head *next; > + unsigned seq; > + int found = 0; > + int locked = 0; > + > + seq = read_seqbegin(&rename_lock); > +again: > + this_parent = parent; > + spin_lock(&this_parent->d_lock); > +repeat: > + next = this_parent->d_subdirs.next; > +resume: > + while (next != &this_parent->d_subdirs) { > + struct list_head *tmp = next; > + struct dentry *dentry; > + > + dentry = list_entry(tmp, struct dentry, d_u.d_child); > + next = tmp->next; > + > + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); > + if (d_mountpoint(dentry)) { > + spin_unlock(&dentry->d_lock); > + found = -EBUSY; > + goto out; > + } > + > + /* > + * move only zero ref count dentries to the dispose list. > + * > + * Those which are presently on the shrink list, being processed > + * by shrink_dentry_list(), shouldn't be moved. Otherwise the > + * loop in shrink_dcache_parent() might not make any progress > + * and loop forever. > + */ > + if (dentry->d_count) { > + dentry_lru_del(dentry); > + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { > + dentry_lru_move_list(dentry, dispose); > + dentry->d_flags |= DCACHE_SHRINK_LIST; > + found++; > + } > + /* > + * We can return to the caller if we have found some (this > + * ensures forward progress). We'll be coming back to find > + * the rest. > + */ > + if (found && need_resched()) { > + spin_unlock(&dentry->d_lock); > + goto out; > + } > + > + /* > + * Descend a level if the d_subdirs list is non-empty. > + */ > + if (!list_empty(&dentry->d_subdirs)) { > + spin_unlock(&this_parent->d_lock); > + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); > + this_parent = dentry; > + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); > + goto repeat; > + } > + > + spin_unlock(&dentry->d_lock); > + } > + /* > + * All done at this level ... ascend and resume the search. > + */ > + if (this_parent != parent) { > + struct dentry *child = this_parent; > + this_parent = try_to_ascend(this_parent, locked, seq); > + if (!this_parent) > + goto rename_retry; > + next = child->d_u.d_child.next; > + goto resume; > + } > + if (!locked && read_seqretry(&rename_lock, seq)) { > + spin_unlock(&this_parent->d_lock); > + goto rename_retry; > + } > + if (d_mountpoint(this_parent)) > + found = -EBUSY; > + if (!found) > + __d_drop(this_parent); > +out: > + spin_unlock(&this_parent->d_lock); > + > + if (locked) > + write_sequnlock(&rename_lock); > + return found; > + > +rename_retry: > + if (found) > + return found; > + if (locked) > + goto again; > + locked = 1; > + write_seqlock(&rename_lock); > + goto again; > +} > + > +/** > + * check_submounts_and_drop - prune dcache, check for submounts and drop > + * > + * All done as a single atomic operation relative to has_unlinked_ancestor(). > + * Returns 0 if successfully unhashed @parent. If there were submounts then > + * return -EBUSY. > + * > + * @dentry: dentry to prune and drop > + */ > +int check_submounts_and_drop(struct dentry *dentry) > +{ > + int ret = 0; > + > + /* Negative dentries can be dropped without further checks */ > + if (!dentry->d_inode) { > + d_drop(dentry); > + goto out; > + } > + > + spin_lock(&dentry->d_lock); > + if (d_unhashed(dentry)) > + goto out_unlock; > + if (list_empty(&dentry->d_subdirs)) { > + if (d_mountpoint(dentry)) { > + ret = -EBUSY; > + goto out_unlock; > + } > + __d_drop(dentry); > + goto out_unlock; > + } > + spin_unlock(&dentry->d_lock); > + > + for (;;) { > + LIST_HEAD(dispose); > + ret = __check_submounts_and_drop(dentry, &dispose); > + if (!list_empty(&dispose)) > + shrink_dentry_list(&dispose); > + > + if (ret <= 0) > + break; > + > + cond_resched(); > + } > + > +out: > + return ret; > + > +out_unlock: > + spin_unlock(&dentry->d_lock); > + goto out; > +} > +EXPORT_SYMBOL(check_submounts_and_drop); > + > /** > * __d_alloc - allocate a dcache entry > * @sb: filesystem it will belong to > diff --git a/include/linux/dcache.h b/include/linux/dcache.h > index b90337c..41b21ca 100644 > --- a/include/linux/dcache.h > +++ b/include/linux/dcache.h > @@ -251,6 +251,7 @@ extern void d_prune_aliases(struct inode *); > > /* test whether we have any submounts in a subdir tree */ > extern int have_submounts(struct dentry *); > +extern int check_submounts_and_drop(struct dentry *); > > /* > * This adds the entry to the hash queues. Nice work... Looks reasonable overall. The only (extremely minor) nit I have is that it might be nice to replace some of the direct struct list_head accesses with helper macros. Acked-by: Jeff Layton <jlayton@xxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html