Re: [PATCH] mnt: Fix a memory stomp in umount

ebiederm@xxxxxxxxxxxx (Eric W. Biederman) · Thu, 18 Dec 2014 13:24:26 -0600

Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes:

> On Thu, Dec 18, 2014 at 9:07 AM, Linus Torvalds
> <torvalds@xxxxxxxxxxxxxxxxxxxx> wrote:
>> Why is this piece of code using its own made up and buggy list handling in
>> the first place? We have list functions for these things, exactly so that
>> people shouldn't write buggy stuff by hand.
>
> Oh. Ok, I see what's going on. We have "list_splice()", but we don't
> have the equivalent "hlist_splice()". So it's doing that by hand, and
> did it badly.
>
> Al, this is your bug. I guess I can take the "manual hlist_splice" fix
> from Eric, but I'm not really happy with it. There's a few other
> places in that same commit where the list splice operation has been
> open-coded.
>
> Mind taking a look?

It looks like we can pretty easily use mnt_list instead of mnt_hash,
see below (note: the code is only compile tested).

While converting this to ordinary list helpers I found something
strange.

In __propagate_umount we currently add the child to be unmounted in a
different location in the list then we did before the conversion of
mnt_hash to a hlist for rcu's accesses benefit.

Now maybe propagate_next handles this (I still need to read and
understand that code) if not it looks like I may have found another bug,
as it looks like today we can add a node to our list without propogating
the unmount from the node.

From: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
Date: Thu, 18 Dec 2014 13:10:48 -0600
Subject: [PATCH] mnt: In umount_tree reuse mnt_list instead of mnt_hash

umount_tree builds a list of mounts that need to be unmounted.
Utilize mnt_list for this purpose instead of mnt_hash as mnt_list is
an ordianry list_head, allowing the use of list_splice and list_move
instead of rolling our own.

Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>
---
 fs/namespace.c | 45 +++++++++++++++++++--------------------------
 fs/pnode.c     |  7 ++++---
 fs/pnode.h     |  2 +-
 3 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 6afbd7bb79f3..2f21a973d7bc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1285,23 +1285,22 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
+static LIST_HEAD(unmounted);	/* protected by namespace_sem */
 
 static void namespace_unlock(void)
 {
 	struct mount *mnt;
-	struct hlist_head head = unmounted;
+	LIST_HEAD(head);
 
-	if (likely(hlist_empty(&head))) {
+	if (likely(list_empty(&unmounted))) {
 		up_write(&namespace_sem);
 		return;
 	}
 
-	head.first->pprev = &head.first;
-	INIT_HLIST_HEAD(&unmounted);
+	list_splice_init(&unmounted, &head);
 
 	/* undo decrements we'd done in umount_tree() */
-	hlist_for_each_entry(mnt, &head, mnt_hash)
+	list_for_each_entry(mnt, &head, mnt_list)
 		if (mnt->mnt_ex_mountpoint.mnt)
 			mntget(mnt->mnt_ex_mountpoint.mnt);
 
@@ -1309,9 +1308,9 @@ static void namespace_unlock(void)
 
 	synchronize_rcu();
 
-	while (!hlist_empty(&head)) {
-		mnt = hlist_entry(head.first, struct mount, mnt_hash);
-		hlist_del_init(&mnt->mnt_hash);
+	while (!list_empty(&head)) {
+		mnt = list_first_entry(&head, struct mount, mnt_list);
+		list_del_init(&mnt->mnt_list);
 		if (mnt->mnt_ex_mountpoint.mnt)
 			path_put(&mnt->mnt_ex_mountpoint);
 		mntput(&mnt->mnt);
@@ -1332,24 +1331,25 @@ static inline void namespace_lock(void)
  */
 void umount_tree(struct mount *mnt, int how)
 {
-	HLIST_HEAD(tmp_list);
+	LIST_HEAD(tmp_list);
 	struct mount *p;
-	struct mount *last = NULL;
 
-	for (p = mnt; p; p = next_mnt(p, mnt)) {
-		hlist_del_init_rcu(&p->mnt_hash);
-		hlist_add_head(&p->mnt_hash, &tmp_list);
-	}
+	/* Gather the mounts to umount */
+	for (p = mnt; p; p = next_mnt(p, mnt))
+		list_move(&p->mnt_list, &tmp_list);
 
-	hlist_for_each_entry(p, &tmp_list, mnt_hash)
+	/* Hide the mounts from lookup_mnt and mnt_mounts */
+	list_for_each_entry(p, &tmp_list, mnt_list) {
+		hlist_del_init_rcu(&p->mnt_hash);
 		list_del_init(&p->mnt_child);
+	}
 
+	/* Add propogated mounts to the tmp_list */
 	if (how)
 		propagate_umount(&tmp_list);
 
-	hlist_for_each_entry(p, &tmp_list, mnt_hash) {
+	list_for_each_entry(p, &tmp_list, mnt_list) {
 		list_del_init(&p->mnt_expire);
-		list_del_init(&p->mnt_list);
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
 		if (how < 2)
@@ -1366,15 +1366,8 @@ void umount_tree(struct mount *mnt, int how)
 			p->mnt_mp = NULL;
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
-		last = p;
-	}
-	if (last) {
-		last->mnt_hash.next = unmounted.first;
-		if (unmounted.first)
-			unmounted.first->pprev = &last->mnt_hash.next;
-		unmounted.first = tmp_list.first;
-		unmounted.first->pprev = &unmounted.first;
 	}
+	list_splice(&tmp_list, &unmounted);
 }
 
 static void shrink_submounts(struct mount *mnt);
diff --git a/fs/pnode.c b/fs/pnode.c
index 260ac8f898a4..c4319520d884 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -384,7 +384,8 @@ static void __propagate_umount(struct mount *mnt)
 		if (child && list_empty(&child->mnt_mounts)) {
 			list_del_init(&child->mnt_child);
 			hlist_del_init_rcu(&child->mnt_hash);
-			hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
+			/* list_move or list_move_tail? */
+			list_move(&child->mnt_list, &mnt->mnt_list);
 		}
 	}
 }
@@ -396,11 +397,11 @@ static void __propagate_umount(struct mount *mnt)
  *
  * vfsmount lock must be held for write
  */
-int propagate_umount(struct hlist_head *list)
+int propagate_umount(struct list_head *list)
 {
 	struct mount *mnt;
 
-	hlist_for_each_entry(mnt, list, mnt_hash)
+	list_for_each_entry(mnt, list, mnt_list)
 		__propagate_umount(mnt);
 	return 0;
 }
diff --git a/fs/pnode.h b/fs/pnode.h
index 4a246358b031..997596a5d31b 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -40,7 +40,7 @@ static inline void set_mnt_shared(struct mount *mnt)
 void change_mnt_propagation(struct mount *, int);
 int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
 		struct hlist_head *);
-int propagate_umount(struct hlist_head *);
+int propagate_umount(struct list_head *);
 int propagate_mount_busy(struct mount *, int);
 void mnt_release_group_id(struct mount *);
 int get_dominating_id(struct mount *mnt, const struct path *root);
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html