[PATCH 39/46] fs: d_revalidate_rcu for rcu-walk

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This seems to be the best way to get filesystems aware of RCU walking
in their d_revalidate routines. Needs more documentation if it works
out OK.

Signed-off-by: Nick Piggin <npiggin@xxxxxxxxx>
---
 fs/dcache.c            |    2 +
 fs/ecryptfs/dentry.c   |   15 ++++++++----
 fs/fat/namei_vfat.c    |   14 ++++++------
 fs/fuse/dir.c          |    7 ++++-
 fs/namei.c             |   56 ++++++++++++++++++++++++++++++++++-------------
 fs/sysfs/dir.c         |   18 ++++++++++++--
 include/linux/dcache.h |    7 +++++-
 7 files changed, 85 insertions(+), 34 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index fa6e7a5..67a08d4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1243,6 +1243,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 		dentry->d_flags |= DCACHE_OP_COMPARE;
 	if (op->d_revalidate)
 		dentry->d_flags |= DCACHE_OP_REVALIDATE;
+	if (op->d_revalidate_rcu)
+		dentry->d_flags |= DCACHE_OP_REVALIDATE_RCU;
 	if (op->d_delete)
 		dentry->d_flags |= DCACHE_OP_DELETE;
 
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803..f9a8be8 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -30,7 +30,7 @@
 #include "ecryptfs_kernel.h"
 
 /**
- * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
+ * ecryptfs_d_revalidate_rcu - revalidate an ecryptfs dentry
  * @dentry: The ecryptfs dentry
  * @nd: The associated nameidata
  *
@@ -42,7 +42,7 @@
  * Returns 1 if valid, 0 otherwise.
  *
  */
-static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int ecryptfs_d_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
 {
 	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
@@ -50,13 +50,18 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct vfsmount *vfsmount_save;
 	int rc = 1;
 
-	if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
+	if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER))
 		goto out;
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
 	dentry_save = nd->path.dentry;
 	vfsmount_save = nd->path.mnt;
 	nd->path.dentry = lower_dentry;
 	nd->path.mnt = lower_mnt;
-	rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
+	if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE)
+		rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
+	else
+		rc = lower_dentry->d_op->d_revalidate_rcu(lower_dentry, nd);
 	nd->path.dentry = dentry_save;
 	nd->path.mnt = vfsmount_save;
 	if (dentry->d_inode) {
@@ -91,6 +96,6 @@ static void ecryptfs_d_release(struct dentry *dentry)
 }
 
 const struct dentry_operations ecryptfs_dops = {
-	.d_revalidate = ecryptfs_d_revalidate,
+	.d_revalidate_rcu = ecryptfs_d_revalidate_rcu,
 	.d_release = ecryptfs_d_release,
 };
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b721715..a169f24 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -31,7 +31,7 @@
  * If it happened, the negative dentry isn't actually negative
  * anymore.  So, drop it.
  */
-static int vfat_revalidate_shortname(struct dentry *dentry)
+static int vfat_revalidate_rcu_shortname(struct dentry *dentry)
 {
 	int ret = 1;
 	spin_lock(&dentry->d_lock);
@@ -41,15 +41,15 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
 	return ret;
 }
 
-static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
 {
 	/* This is not negative dentry. Always valid. */
 	if (dentry->d_inode)
 		return 1;
-	return vfat_revalidate_shortname(dentry);
+	return vfat_revalidate_rcu_shortname(dentry);
 }
 
-static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate_rcu_ci(struct dentry *dentry, struct nameidata *nd)
 {
 	/*
 	 * This is not negative dentry. Always valid.
@@ -81,7 +81,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
 			return 0;
 	}
 
-	return vfat_revalidate_shortname(dentry);
+	return vfat_revalidate_rcu_shortname(dentry);
 }
 
 /* returns the length of a struct qstr, ignoring trailing dots */
@@ -175,13 +175,13 @@ static int vfat_cmp(const struct dentry *parent,
 }
 
 static const struct dentry_operations vfat_ci_dentry_ops = {
-	.d_revalidate	= vfat_revalidate_ci,
+	.d_revalidate	= vfat_revalidate_rcu_ci,
 	.d_hash		= vfat_hashi,
 	.d_compare	= vfat_cmpi,
 };
 
 static const struct dentry_operations vfat_dentry_ops = {
-	.d_revalidate	= vfat_revalidate,
+	.d_revalidate	= vfat_revalidate_rcu,
 	.d_hash		= vfat_hash,
 	.d_compare	= vfat_cmp,
 };
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9a8a42..e409185 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
  * the lookup once more.  If the lookup results in the same inode,
  * then refresh the attributes, timeouts and mark the dentry valid.
  */
-static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+static int fuse_dentry_revalidate_rcu(struct dentry *entry, struct nameidata *nd)
 {
 	struct inode *inode = entry->d_inode;
 
@@ -169,6 +169,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		struct dentry *parent;
 		u64 attr_version;
 
+		if (nd->flags & LOOKUP_RCU)
+			return -ECHILD;
+
 		/* For negative dentries, always do a fresh lookup */
 		if (!inode)
 			return 0;
@@ -225,7 +228,7 @@ static int invalid_nodeid(u64 nodeid)
 }
 
 const struct dentry_operations fuse_dentry_operations = {
-	.d_revalidate	= fuse_dentry_revalidate,
+	.d_revalidate_rcu = fuse_dentry_revalidate_rcu,
 };
 
 int fuse_valid_type(int m)
diff --git a/fs/namei.c b/fs/namei.c
index 717ab13..d8f7ece 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -519,10 +519,30 @@ void release_open_intent(struct nameidata *nd)
 		fput(nd->intent.open.file);
 }
 
+static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	if (dentry->d_op->d_revalidate_rcu) {
+		int status;
+		status = dentry->d_op->d_revalidate_rcu(dentry, nd);
+		if (!status && (nd->flags & LOOKUP_RCU))
+			status = -ECHILD;
+		if (status == -ECHILD) {
+			BUG_ON(!(nd->flags & LOOKUP_RCU));
+			if (nameidata_dentry_drop_rcu(nd, dentry))
+				return status;
+			status = dentry->d_op->d_revalidate_rcu(dentry, nd);
+		}
+		return status;
+	} else
+		return dentry->d_op->d_revalidate(dentry, nd);
+}
+
 static inline struct dentry *
 do_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	int status = dentry->d_op->d_revalidate(dentry, nd);
+	int status;
+
+	status = d_revalidate(dentry, nd);
 	if (unlikely(status <= 0)) {
 		/*
 		 * The dentry failed validation.
@@ -545,7 +565,7 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 static inline int need_reval_dot(struct dentry *dentry)
 {
-	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER)))
 		return 0;
 
 	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
@@ -582,7 +602,7 @@ force_reval_path(struct path *path, struct nameidata *nd)
 	if (!need_reval_dot(dentry))
 		return 0;
 
-	status = dentry->d_op->d_revalidate(dentry, nd);
+	status = d_revalidate(dentry, nd);
 	if (status > 0)
 		return 0;
 
@@ -994,10 +1014,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 			return -ECHILD;
 
 		nd->seq = seq;
-		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-			/* XXX: RCU chokes here */
-			if (nameidata_dentry_drop_rcu(nd, dentry))
-				return -ECHILD;
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER) {
+			if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
+				if (nameidata_dentry_drop_rcu(nd, dentry))
+					return -ECHILD;
+			}
 			goto need_revalidate;
 		}
 		path->mnt = mnt;
@@ -1050,8 +1071,11 @@ need_lookup:
 
 need_revalidate:
 	dentry = do_revalidate(dentry, nd);
-	if (!dentry)
+	if (!dentry) {
+		if (try_nameidata_drop_rcu(nd))
+			return -ECHILD;
 		goto need_lookup;
+	}
 	if (IS_ERR(dentry))
 		goto fail;
 	goto done;
@@ -1247,12 +1271,11 @@ return_reval:
 		 * We may need to check the cached dentry for staleness.
 		 */
 		if (need_reval_dot(nd->path.dentry)) {
-			if (try_nameidata_drop_rcu(nd))
-				return -ECHILD;
-			err = -ESTALE;
 			/* Note: we do not d_invalidate() */
-			if (!nd->path.dentry->d_op->d_revalidate(
-					nd->path.dentry, nd))
+			err = d_revalidate(nd->path.dentry, nd);
+			if (!err)
+				err = -ESTALE;
+			if (err < 0)
 				break;
 		}
 return_base:
@@ -1566,7 +1589,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	 */
 	dentry = d_lookup(base, name);
 
-	if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
+	if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER))
 		dentry = do_revalidate(dentry, nd);
 
 	if (!dentry)
@@ -2021,10 +2044,11 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		dir = nd->path.dentry;
 	case LAST_DOT:
 		if (need_reval_dot(dir)) {
-			if (!dir->d_op->d_revalidate(dir, nd)) {
+			error = d_revalidate(nd->path.dentry, nd);
+			if (!error)
 				error = -ESTALE;
+			if (error < 0)
 				goto exit;
-			}
 		}
 		/* fallthrough */
 	case LAST_ROOT:
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3e076ca..c3fff57 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -237,12 +237,21 @@ static int sysfs_dentry_delete(const struct dentry *dentry)
 	return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
 }
 
-static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int sysfs_dentry_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
 {
 	struct sysfs_dirent *sd = dentry->d_fsdata;
 	int is_dir;
 
-	mutex_lock(&sysfs_mutex);
+	if (nd->flags & LOOKUP_RCU) {
+		if (!mutex_trylock(&sysfs_mutex))
+			return -ECHILD;
+		/* ensure dentry still exists, now under the sysfs_mutex */
+		if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
+			mutex_unlock(&sysfs_mutex);
+			return -ECHILD;
+		}
+	} else
+		mutex_lock(&sysfs_mutex);
 
 	/* The sysfs dirent has been deleted */
 	if (sd->s_flags & SYSFS_FLAG_REMOVED)
@@ -272,6 +281,9 @@ out_bad:
 	 */
 	is_dir = (sysfs_type(sd) == SYSFS_DIR);
 	mutex_unlock(&sysfs_mutex);
+	if (nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
 	if (is_dir) {
 		/* If we have submounts we must allow the vfs caches
 		 * to lie about the state of the filesystem to prevent
@@ -294,7 +306,7 @@ static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 }
 
 static const struct dentry_operations sysfs_dentry_ops = {
-	.d_revalidate	= sysfs_dentry_revalidate,
+	.d_revalidate_rcu = sysfs_dentry_revalidate_rcu,
 	.d_delete	= sysfs_dentry_delete,
 	.d_iput		= sysfs_dentry_iput,
 };
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3c5cafc..72f5f32 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -132,6 +132,7 @@ enum dentry_d_lock_class
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
+	int (*d_revalidate_rcu)(struct dentry *, struct nameidata *);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *,
@@ -184,8 +185,12 @@ struct dentry_operations {
 #define DCACHE_OP_HASH		0x1000
 #define DCACHE_OP_COMPARE	0x2000
 #define DCACHE_OP_REVALIDATE	0x4000
-#define DCACHE_OP_DELETE	0x8000
+#define DCACHE_OP_REVALIDATE_RCU 0x8000
 
+#define DCACHE_OP_DELETE	0x10000
+
+#define DCACHE_OP_REVALIDATE_EITHER	\
+	(DCACHE_OP_REVALIDATE|DCACHE_OP_REVALIDATE_RCU)
 
 extern spinlock_t dcache_inode_lock;
 extern spinlock_t dcache_hash_lock;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux