[PATCH review 3/8] dcache: Clearly separate the two directory rename cases in d_splice_alias

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



There are two scenarios that can result in an alias being found in
d_splice_alias.  The first scenario is a disconnected dentry being
looked up and reconnected (common with nfs file handles, and
open_by_handle_at).  The second scenario is a lookup on a directory
lazily discovering that it was renamed on the remote filesystem.

The locking challenge for handling these two scenarios is that
the instance of lookup calling d_splice_alias has already
taken the dentry->d_parent->d_inode->i_mutex.

If the mutex was not held the locking we could just do:
	rename_mutex = &inode->i_sb->s_vfs_rename_mutex;
	mutex_lock(rename_mutex);
	if (d_ancestor(alias, dentry)) {
		mutex_unlock(rename_mutex);
		pr_warn_ratelimited(...);
		return -ELOOP;
	}
	m1 = &dentry->d_parent->d_inode->i_mutex;
	mutex_lock_nested(m1, I_MUTEX_PARENT);
	if (!IS_ROOT(alias) && (alias->d_parent != dentry->d_parent)) {
		m2 = &alias->d_parent->d_inode->i_mutex;
		mutex_lock_nested(m2, I_MUTEX_PARENT2);
	}
	d_move(alias, dentry);
	if (m2)
		mutex_unlock(m2);
	mutex_unlock(m1);
	mutex_unlock(rename_mutex);

Which is essentially lock_rename and unlock_reaname with added
handling of the IS_ROOT case.

The problem is that as the lookup locking stands today grabbing the
s_vfs_rename_mutex must use mutex_trylock which can fail, so for reliability
reasons we need to avoid using the rename_mutex as much as possible.

For the case where a disconnected dentry is being connected (aka the
IS_ROOT case) the d_ancestor test can be placed under the rename_lock
removing any chance that taking locks will cause a failure.

For the lazy rename case things are trickier because when
dentry->d_parent and alias->d_parent are not equal the code need to
take the s_vfs_rename_mutex, or risk the d_ancestor call in
lock_rename being inaccurate.  As s_vfs_rename_mutex is take first
there are no locking ordering issues with taking
alias->d_parent->d_inode->i_mutex.  Furthermore as games with
rename_lock are unnecessary d_move instead of __d_move can be called.

Sleeping in d_splice_alias is not something new as
security_d_instantiate is a sleeping function.

Compared to the current implementation this change introduces a
function for each case __d_rename_alias and __d_connect_alias and
moves the acquisition of rename_lock down into those functions.

In the case of __d_rename_alias which was __d_unalias this allows the
existing lock ordering rules to be followed much more closely removing
the need for a trylock when acquiring
alias->d_parent->d_inode->i_mutex, and allowing the use of d_move.

A common helper that prints the warning message when a loop is
detected is factored out into d_alias_is_ancestor so that code does not
need to be duplicated in both cases.

Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
---
 fs/dcache.c | 111 ++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 67 insertions(+), 44 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 53b7f1e63beb..c1eece74621f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2711,41 +2711,77 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
 	return NULL;
 }
 
+static struct dentry *d_alias_is_ancestor(struct dentry *alias,
+					  struct dentry *dentry)
+{
+	struct dentry *ancestor = d_ancestor(alias, dentry);
+	if (ancestor) {
+		pr_warn_ratelimited(
+			"VFS: Lookup of '%s' in %s %s would have caused loop\n",
+			dentry->d_name.name,
+			dentry->d_sb->s_type->name,
+			dentry->d_sb->s_id);
+	}
+	return ancestor;
+}
+
 /*
  * This helper attempts to cope with remotely renamed directories
  *
  * It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex, and rename_lock
+ * dentry->d_parent->d_inode->i_mutex
  *
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
  */
-static int __d_unalias(struct inode *inode,
-		struct dentry *dentry, struct dentry *alias)
+static int __d_rename_alias(struct dentry *alias, struct dentry *dentry)
 {
-	struct mutex *m1 = NULL, *m2 = NULL;
-	int ret = -ESTALE;
+	struct mutex *rename_mutex = NULL, *parent_mutex = NULL;
 
-	/* If alias and dentry share a parent, then no extra locks required */
-	if (alias->d_parent == dentry->d_parent)
-		goto out_unalias;
+	/* Holding dentry->d_parent->d_inode->i_mutex guarantees this
+	 * that the equality or inequality alias->d_parent and
+	 * dentry->d_parent remains stable.
+	 */
+	if (alias->d_parent != dentry->d_parent) {
+		/* See lock_rename() */
+		rename_mutex = &dentry->d_sb->s_vfs_rename_mutex;
+		if (!mutex_trylock(rename_mutex))
+			return -ESTALE;
+
+		if (d_alias_is_ancestor(alias, dentry)) {
+			mutex_unlock(rename_mutex);
+			return -ELOOP;
+		}
+		parent_mutex = &alias->d_parent->d_inode->i_mutex;
+		mutex_lock_nested(parent_mutex, I_MUTEX_PARENT2);
+	}
+	d_move(alias, dentry);
+	if (parent_mutex) {
+		mutex_unlock(parent_mutex);
+		mutex_unlock(rename_mutex);
+	}
+	return 0;
+}
 
-	/* See lock_rename() */
-	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
-		goto out_err;
-	m1 = &dentry->d_sb->s_vfs_rename_mutex;
-	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
-		goto out_err;
-	m2 = &alias->d_parent->d_inode->i_mutex;
-out_unalias:
+/*
+ * This helper connects disconnected dentries.
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex
+ *
+ */
+static int __d_connect_alias(struct inode *inode,
+			     struct dentry *alias, struct dentry *dentry)
+{
+	write_seqlock(&rename_lock);
+	if (unlikely(d_alias_is_ancestor(alias, dentry))) {
+		write_sequnlock(&rename_lock);
+		return -ELOOP;
+	}
 	__d_move(alias, dentry, false);
-	ret = 0;
-out_err:
-	if (m2)
-		mutex_unlock(m2);
-	if (m1)
-		mutex_unlock(m1);
-	return ret;
+	write_sequnlock(&rename_lock);
+	security_d_instantiate(alias, inode);
+	return 0;
 }
 
 /**
@@ -2786,30 +2822,17 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 	if (S_ISDIR(inode->i_mode)) {
 		struct dentry *new = __d_find_any_alias(inode);
 		if (unlikely(new)) {
+			int err;
 			/* The reference to new ensures it remains an alias */
 			spin_unlock(&inode->i_lock);
-			write_seqlock(&rename_lock);
-			if (unlikely(d_ancestor(new, dentry))) {
-				write_sequnlock(&rename_lock);
+
+			if (!IS_ROOT(new))
+				err = __d_rename_alias(new, dentry);
+			else
+				err = __d_connect_alias(inode, new, dentry);
+			if (err) {
 				dput(new);
-				new = ERR_PTR(-ELOOP);
-				pr_warn_ratelimited(
-					"VFS: Lookup of '%s' in %s %s"
-					" would have caused loop\n",
-					dentry->d_name.name,
-					inode->i_sb->s_type->name,
-					inode->i_sb->s_id);
-			} else if (!IS_ROOT(new)) {
-				int err = __d_unalias(inode, dentry, new);
-				write_sequnlock(&rename_lock);
-				if (err) {
-					dput(new);
-					new = ERR_PTR(err);
-				}
-			} else {
-				__d_move(new, dentry, false);
-				write_sequnlock(&rename_lock);
-				security_d_instantiate(new, inode);
+				new = ERR_PTR(err);
 			}
 			iput(inode);
 			return new;
-- 
2.2.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux