[PATCH] Fix a race in put_mountpoint.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This can cause a panic when simultaneous callers of put_mountpoint
attempt to free the same mountpoint.  This occurs because some callers
hold the mount_hash_lock, while others hold the namespace lock.  Some
even hold both.

In this submitter's case, the panic manifested itself as a GP fault in
put_mountpoint() when it called hlist_del() and attempted to dereference
a m_hash.pprev that had been poisioned by another thread.

Instead of trying to force all mountpoint hash users back under the
namespace lock, add locks that cover just the mountpoint hash.  This
uses hlist_bl to protect against simlultaneous additions and removals,
and RCU for lookups.

Signed-off-by: Krister Johansen <kjlx@xxxxxxxxxxxxxxxxxx>
---
 fs/mount.h     |  6 ++++--
 fs/namespace.c | 62 ++++++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 2c856fc..1a2f41a 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -3,6 +3,7 @@
 #include <linux/poll.h>
 #include <linux/ns_common.h>
 #include <linux/fs_pin.h>
+#include <linux/rculist_bl.h>
 
 struct mnt_namespace {
 	atomic_t		count;
@@ -24,10 +25,11 @@ struct mnt_pcp {
 };
 
 struct mountpoint {
-	struct hlist_node m_hash;
+	struct hlist_bl_node m_hash;
 	struct dentry *m_dentry;
 	struct hlist_head m_list;
-	int m_count;
+	atomic_t m_count;
+	struct rcu_head m_rcu;
 };
 
 struct mount {
diff --git a/fs/namespace.c b/fs/namespace.c
index b5b1259..7c29420 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -63,7 +63,7 @@ static int mnt_id_start = 0;
 static int mnt_group_start = 1;
 
 static struct hlist_head *mount_hashtable __read_mostly;
-static struct hlist_head *mountpoint_hashtable __read_mostly;
+static struct hlist_bl_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
 static DECLARE_RWSEM(namespace_sem);
 
@@ -89,7 +89,7 @@ static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *den
 	return &mount_hashtable[tmp & m_hash_mask];
 }
 
-static inline struct hlist_head *mp_hash(struct dentry *dentry)
+static inline struct hlist_bl_head *mp_hash(struct dentry *dentry)
 {
 	unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
 	tmp = tmp + (tmp >> mp_hash_shift);
@@ -727,27 +727,36 @@ bool __is_local_mountpoint(struct dentry *dentry)
 
 static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 {
-	struct hlist_head *chain = mp_hash(dentry);
+	struct hlist_bl_head *chain = mp_hash(dentry);
+	struct hlist_bl_node *node;
 	struct mountpoint *mp;
 
-	hlist_for_each_entry(mp, chain, m_hash) {
+	rcu_read_lock();
+	hlist_bl_for_each_entry_rcu(mp, node, chain, m_hash) {
 		if (mp->m_dentry == dentry) {
 			/* might be worth a WARN_ON() */
-			if (d_unlinked(dentry))
-				return ERR_PTR(-ENOENT);
-			mp->m_count++;
-			return mp;
+			if (d_unlinked(dentry)) {
+				mp = ERR_PTR(-ENOENT);
+				goto out;
+			}
+			if (atomic_inc_not_zero(&mp->m_count))
+				goto out;
 		}
 	}
-	return NULL;
+	mp = NULL;
+out:
+	rcu_read_unlock();
+	return mp;
 }
 
 static struct mountpoint *new_mountpoint(struct dentry *dentry)
 {
-	struct hlist_head *chain = mp_hash(dentry);
+	struct hlist_bl_head *chain = mp_hash(dentry);
 	struct mountpoint *mp;
 	int ret;
 
+	WARN_ON(!rwsem_is_locked(&namespace_sem));
+
 	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
 	if (!mp)
 		return ERR_PTR(-ENOMEM);
@@ -759,22 +768,37 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
 	}
 
 	mp->m_dentry = dentry;
-	mp->m_count = 1;
-	hlist_add_head(&mp->m_hash, chain);
+	init_rcu_head(&mp->m_rcu);
+	atomic_set(&mp->m_count, 1);
+	hlist_bl_lock(chain);
+	hlist_bl_add_head_rcu(&mp->m_hash, chain);
+	hlist_bl_unlock(chain);
 	INIT_HLIST_HEAD(&mp->m_list);
 	return mp;
 }
 
+static void free_mountpoint(struct rcu_head *head)
+{
+	struct mountpoint *mp;
+
+	mp = container_of(head, struct mountpoint, m_rcu);
+	kfree(mp);
+}
+
 static void put_mountpoint(struct mountpoint *mp)
 {
-	if (!--mp->m_count) {
+	if (atomic_dec_and_test(&mp->m_count)) {
 		struct dentry *dentry = mp->m_dentry;
+		struct hlist_bl_head *chain = mp_hash(dentry);
+
 		BUG_ON(!hlist_empty(&mp->m_list));
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags &= ~DCACHE_MOUNTED;
 		spin_unlock(&dentry->d_lock);
-		hlist_del(&mp->m_hash);
-		kfree(mp);
+		hlist_bl_lock(chain);
+		hlist_bl_del_rcu(&mp->m_hash);
+		hlist_bl_unlock(chain);
+		call_rcu(&mp->m_rcu, free_mountpoint);
 	}
 }
 
@@ -846,7 +870,7 @@ void mnt_set_mountpoint(struct mount *mnt,
 			struct mountpoint *mp,
 			struct mount *child_mnt)
 {
-	mp->m_count++;
+	atomic_inc(&mp->m_count);
 	mnt_add_count(mnt, 1);	/* essentially, that's mntget */
 	child_mnt->mnt_mountpoint = dget(mp->m_dentry);
 	child_mnt->mnt_parent = mnt;
@@ -3120,7 +3144,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* make certain new is below the root */
 	if (!is_path_reachable(new_mnt, new.dentry, &root))
 		goto out4;
-	root_mp->m_count++; /* pin it so it won't go away */
+	atomic_inc(&root_mp->m_count); /* pin it so it won't go away */
 	lock_mount_hash();
 	detach_mnt(new_mnt, &parent_path);
 	detach_mnt(root_mnt, &root_parent);
@@ -3199,7 +3223,7 @@ void __init mnt_init(void)
 				0,
 				&m_hash_shift, &m_hash_mask, 0, 0);
 	mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
-				sizeof(struct hlist_head),
+				sizeof(struct hlist_bl_head),
 				mphash_entries, 19,
 				0,
 				&mp_hash_shift, &mp_hash_mask, 0, 0);
@@ -3210,7 +3234,7 @@ void __init mnt_init(void)
 	for (u = 0; u <= m_hash_mask; u++)
 		INIT_HLIST_HEAD(&mount_hashtable[u]);
 	for (u = 0; u <= mp_hash_mask; u++)
-		INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
+		INIT_HLIST_BL_HEAD(&mountpoint_hashtable[u]);
 
 	kernfs_init();
 
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux