[PATCH 1/4] vfs: check submounts and drop atomically

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Miklos Szeredi <mszeredi@xxxxxxx>

We check submounts before doing d_drop() on a non-empty directory dentry in
NFS (have_submounts()), but we do not exclude a racing mount.

 Process A: have_submounts() -> returns false
 Process B: mount() -> success
 Process A: d_drop()

This patch prepares the ground for the fix by doing the following
operations all under the same rename lock:

  have_submounts()
  shrink_dcache_parent()
  d_drop()

This is actually an optimization since have_submounts() and
shrink_dcache_parent() both traverse the same dentry tree separately.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
---
 fs/afs/dir.c           |   6 +--
 fs/dcache.c            | 131 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/dentry.c       |   6 +--
 fs/nfs/dir.c           |  10 ++--
 fs/sysfs/dir.c         |   6 +--
 include/linux/dcache.h |   1 +
 6 files changed, 146 insertions(+), 14 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 34494fb..968f50d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -687,14 +687,14 @@ not_found:
 out_bad:
 	if (dentry->d_inode) {
 		/* don't unhash if we have submounts */
-		if (have_submounts(dentry))
+		if (check_submounts_and_drop(dentry) != 0)
 			goto out_skip;
+	} else {
+		d_drop(dentry);
 	}
 
 	_debug("dropping dentry %s/%s",
 	       parent->d_name.name, dentry->d_name.name);
-	shrink_dcache_parent(dentry);
-	d_drop(dentry);
 	dput(parent);
 	key_put(key);
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 87bdb53..ba429d9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1224,6 +1224,137 @@ void shrink_dcache_parent(struct dentry * parent)
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
+static int __check_submounts_and_drop(struct dentry *parent,
+					     struct list_head *dispose)
+{
+	struct dentry *this_parent;
+	struct list_head *next;
+	unsigned seq;
+	int found = 0;
+	int locked = 0;
+
+	seq = read_seqbegin(&rename_lock);
+again:
+	this_parent = parent;
+	spin_lock(&this_parent->d_lock);
+repeat:
+	next = this_parent->d_subdirs.next;
+resume:
+	while (next != &this_parent->d_subdirs) {
+		struct list_head *tmp = next;
+		struct dentry *dentry;
+
+		dentry = list_entry(tmp, struct dentry, d_u.d_child);
+		next = tmp->next;
+
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		if (d_mountpoint(dentry)) {
+			spin_unlock(&dentry->d_lock);
+			found = -EBUSY;
+			goto out;
+		}
+
+		/*
+		 * move only zero ref count dentries to the dispose list.
+		 *
+		 * Those which are presently on the shrink list, being processed
+		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
+		 * loop in shrink_dcache_parent() might not make any progress
+		 * and loop forever.
+		 */
+		if (dentry->d_count) {
+			dentry_lru_del(dentry);
+		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+			dentry_lru_move_list(dentry, dispose);
+			dentry->d_flags |= DCACHE_SHRINK_LIST;
+			found++;
+		}
+		/*
+		 * We can return to the caller if we have found some (this
+		 * ensures forward progress). We'll be coming back to find
+		 * the rest.
+		 */
+		if (found && need_resched()) {
+			spin_unlock(&dentry->d_lock);
+			goto out;
+		}
+
+		/*
+		 * Descend a level if the d_subdirs list is non-empty.
+		 */
+		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+			goto repeat;
+		}
+
+		spin_unlock(&dentry->d_lock);
+	}
+	/*
+	 * All done at this level ... ascend and resume the search.
+	 */
+	if (this_parent != parent) {
+		struct dentry *child = this_parent;
+		this_parent = try_to_ascend(this_parent, locked, seq);
+		if (!this_parent)
+			goto rename_retry;
+		next = child->d_u.d_child.next;
+		goto resume;
+	}
+out:
+	if (!locked && read_seqretry(&rename_lock, seq)) {
+		spin_unlock(&this_parent->d_lock);
+		goto rename_retry;
+	}
+	__d_drop(this_parent);
+	spin_unlock(&this_parent->d_lock);
+
+	if (locked)
+		write_sequnlock(&rename_lock);
+	return found;
+
+
+rename_retry:
+	if (found)
+		return found;
+	if (locked)
+		goto again;
+	locked = 1;
+	write_seqlock(&rename_lock);
+	goto again;
+}
+
+/**
+ * check_submounts_and_drop - prune dcache, check for submounts and drop
+ *
+ * All done as a single atomic operation relative to has_unlinked_ancestor().
+ * Returns 0 if successfully unhashed @parent.  If there were submounts then
+ * return -EBUSY.
+ *
+ * @dentry: dentry to prune and drop
+ */
+int check_submounts_and_drop(struct dentry *dentry)
+{
+	int found;
+
+	for (;;) {
+		LIST_HEAD(dispose);
+		found = __check_submounts_and_drop(dentry, &dispose);
+		if (!list_empty(&dispose))
+			shrink_dentry_list(&dispose);
+
+		if (found <= 0)
+			break;
+
+		cond_resched();
+	}
+
+	return found;
+}
+EXPORT_SYMBOL(check_submounts_and_drop);
+
 /**
  * __d_alloc	-	allocate a dcache entry
  * @sb: filesystem it will belong to
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index f2448ab..6964725 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -94,11 +94,11 @@ invalid_gunlock:
 		gfs2_glock_dq_uninit(&d_gh);
 invalid:
 	if (inode && S_ISDIR(inode->i_mode)) {
-		if (have_submounts(dentry))
+		if (check_submounts_and_drop(dentry) != 0)
 			goto valid;
-		shrink_dcache_parent(dentry);
+	} else {
+		d_drop(dentry);
 	}
-	d_drop(dentry);
 	dput(parent);
 	return 0;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e474ca2b..a2fd681 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1135,14 +1135,14 @@ out_zap_parent:
 	if (inode && S_ISDIR(inode->i_mode)) {
 		/* Purge readdir caches. */
 		nfs_zap_caches(inode);
-		/* If we have submounts, don't unhash ! */
-		if (have_submounts(dentry))
-			goto out_valid;
 		if (dentry->d_flags & DCACHE_DISCONNECTED)
 			goto out_valid;
-		shrink_dcache_parent(dentry);
+		/* If we have submounts, don't unhash ! */
+		if (check_submounts_and_drop(dentry) != 0)
+			goto out_valid;
+	} else {
+		d_drop(dentry);
 	}
-	d_drop(dentry);
 	dput(parent);
 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
 			__func__, dentry->d_parent->d_name.name,
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..1778320 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -348,11 +348,11 @@ out_bad:
 		 * to lie about the state of the filesystem to prevent
 		 * leaks and other nasty things.
 		 */
-		if (have_submounts(dentry))
+		if (check_submounts_and_drop(dentry) != 0)
 			goto out_valid;
-		shrink_dcache_parent(dentry);
+	} else {
+		d_drop(dentry);
 	}
-	d_drop(dentry);
 	return 0;
 }
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b90337c..41b21ca 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -251,6 +251,7 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
+extern int check_submounts_and_drop(struct dentry *);
 
 /*
  * This adds the entry to the hash queues.
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux