Re: [PATCH] Fix a race in put_mountpoint.

Al Viro <viro@xxxxxxxxxxxxxxxxxx> · Tue, 3 Jan 2017 04:00:52 +0000

On Tue, Jan 03, 2017 at 04:17:14PM +1300, Eric W. Biederman wrote:
> Al Viro <viro@xxxxxxxxxxxxxxxxxx> writes:
> 
> > On Tue, Jan 03, 2017 at 01:51:36PM +1300, Eric W. Biederman wrote:
> >
> >> The only significant thing I see is that you have not taken the
> >> mount_lock on the path where new_mountpoint adds the new struct
> >> mountpoint into the mountpoint hash table.
> >
> > Umm...  Point, but I really don't like that bouncing mount_lock up
> > and down there.  It's not going to cause any serious overhead,
> > but it just looks ugly... ;-/
> >
> > Let me think for a while...
> 
> The other possibility is to grab namespace_sem in mntput_no_expire
> around the call of umount_mnt.  That is the only path where
> put_mountpoint can be called where we are not holding namespace_sem.
> That works in the small but I haven't traced the callers of mntput and
> mntput_no_expire yet to see if it works in practice.

No, that's a really bad idea.  Final mntput should _not_ happen under
namespace_lock, but I don't want grabbing it in that place.

How about this instead:

diff --git a/fs/namespace.c b/fs/namespace.c
index b5b1259e064f..20fc797277f8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -742,29 +742,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 	return NULL;
 }
 
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
-{
-	struct hlist_head *chain = mp_hash(dentry);
-	struct mountpoint *mp;
-	int ret;
-
-	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
-	if (!mp)
-		return ERR_PTR(-ENOMEM);
-
-	ret = d_set_mounted(dentry);
-	if (ret) {
-		kfree(mp);
-		return ERR_PTR(ret);
-	}
-
-	mp->m_dentry = dentry;
-	mp->m_count = 1;
-	hlist_add_head(&mp->m_hash, chain);
-	INIT_HLIST_HEAD(&mp->m_list);
-	return mp;
-}
-
 static void put_mountpoint(struct mountpoint *mp)
 {
 	if (!--mp->m_count) {
@@ -1595,11 +1572,11 @@ void __detach_mounts(struct dentry *dentry)
 	struct mount *mnt;
 
 	namespace_lock();
+	lock_mount_hash();
 	mp = lookup_mountpoint(dentry);
 	if (IS_ERR_OR_NULL(mp))
 		goto out_unlock;
 
-	lock_mount_hash();
 	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
@@ -1609,9 +1586,9 @@ void __detach_mounts(struct dentry *dentry)
 		}
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 	}
-	unlock_mount_hash();
 	put_mountpoint(mp);
 out_unlock:
+	unlock_mount_hash();
 	namespace_unlock();
 }
 
@@ -2027,8 +2004,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 
 static struct mountpoint *lock_mount(struct path *path)
 {
+	struct mountpoint *mp = NULL;
 	struct vfsmount *mnt;
 	struct dentry *dentry = path->dentry;
+	int err;
+
 retry:
 	inode_lock(dentry->d_inode);
 	if (unlikely(cant_mount(dentry))) {
@@ -2037,29 +2017,60 @@ static struct mountpoint *lock_mount(struct path *path)
 	}
 	namespace_lock();
 	mnt = lookup_mnt(path);
-	if (likely(!mnt)) {
-		struct mountpoint *mp = lookup_mountpoint(dentry);
-		if (!mp)
-			mp = new_mountpoint(dentry);
-		if (IS_ERR(mp)) {
+	if (unlikely(mnt)) {
+		namespace_unlock();
+		inode_unlock(path->dentry->d_inode);
+		path_put(path);
+		path->mnt = mnt;
+		dentry = path->dentry = dget(mnt->mnt_root);
+		goto retry;
+	}
+
+	/*
+	 * OK, we have namespace_lock held, nothing is overmounting
+	 * *path and inode of mountpoint to be is locked.
+	 */
+	if (likely(!d_mountpoint(dentry)))
+		mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+	read_seqlock_excl(&mount_lock);
+	if (!mp && !d_mountpoint(dentry)) {
+		read_sequnlock_excl(&mount_lock);
+		mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+		read_seqlock_excl(&mount_lock);
+	}
+	if (d_mountpoint(dentry)) {
+		kfree(mp);
+		mp = lookup_mountpoint(dentry);
+	} else {
+		if (unlikely(!mp)) {
+			read_sequnlock_excl(&mount_lock);
 			namespace_unlock();
 			inode_unlock(dentry->d_inode);
-			return mp;
+			return ERR_PTR(-ENOMEM);
 		}
-		return mp;
+		err = d_set_mounted(dentry);
+		if (unlikely(err)) {
+			kfree(mp);
+			read_sequnlock_excl(&mount_lock);
+			namespace_unlock();
+			inode_unlock(dentry->d_inode);
+			return ERR_PTR(err);
+		}
+		mp->m_dentry = dentry;
+		mp->m_count = 1;
+		hlist_add_head(&mp->m_hash, mp_hash(dentry));
+		INIT_HLIST_HEAD(&mp->m_list);
 	}
-	namespace_unlock();
-	inode_unlock(path->dentry->d_inode);
-	path_put(path);
-	path->mnt = mnt;
-	dentry = path->dentry = dget(mnt->mnt_root);
-	goto retry;
+	read_sequnlock_excl(&mount_lock);
+	return mp;
 }
 
 static void unlock_mount(struct mountpoint *where)
 {
 	struct dentry *dentry = where->m_dentry;
+	read_seqlock_excl(&mount_lock);
 	put_mountpoint(where);
+	read_sequnlock_excl(&mount_lock);
 	namespace_unlock();
 	inode_unlock(dentry->d_inode);
 }
@@ -3137,7 +3148,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	list_del_init(&new_mnt->mnt_expire);
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
+	read_seqlock_excl(&mount_lock);
 	put_mountpoint(root_mp);
+	read_sequnlock_excl(&mount_lock);
 	error = 0;
 out4:
 	unlock_mount(old_mp);
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html