Re: [RFC][PATCH] fix move/migrate_pages() race on task struct

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 23 Feb 2012, Eric W. Biederman wrote:

> > The bug in migrate_pages() is that we do a rcu_unlock and a rcu_lock. If
> > we drop those then we should be safe if the use of a task pointer within a
> > rcu section is safe without taking a refcount.
>
> Yes the user of a task_struct pointer found via a userspace pid is valid
> for the life of an rcu critical section, and the bug is indeed that we
> drop the rcu_lock and somehow expect the task to remain valid.
>
> The guarantee comes from release_task.  In release_task we call
> __exit_signal which calls __unhash_process, and then we call
> delayed_put_task to guarantee that the task lives until the end of the
> rcu interval.

Ah. Ok. Great.

> In migrate_pages we have a lot of task accesses outside of the rcu
> critical section, and without a reference count on task.

Yes but that is only of interesting for setup and verification of
permissions. What matters during migration is that the mm_struct does not
go away and we take a refcount on that one.

> I tell you the truth trying to figure out what that code needs to be
> correct if task != current makes my head hurt.

Hmm...

> I think we need to grab a reference on task_struct, to stop the task
> from going away, and in addition we need to hold task_lock.  To keep
> task->mm from changing (see exec_mmap).  But we can't do that and sleep
> so I think the entire function needs to be rewritten, and the need for
> task deep in the migrate_pages path needs to be removed as even with the
> reference count held we can race with someone calling exec.

We dont need the task during migration. We only need the mm. The task
is safe until rcu_read_unlock therefore maybe the following should fix
migrate pages:


Subject: migration: Do not do rcu_read_unlock until the last time we need the task_struct pointer

Migration functions perform the rcu_read_unlock too early. As a result the
task pointed to may change. Bugs were introduced when adding security checks
because rcu_unlock/lock sequences were inserted. Plus the security checks
and do_move_pages used the task_struct pointer after rcu_unlock.

Fix those issues by removing the unlock/lock sequences and moving the
rcu_read_unlock after the last use of the task struct pointer.

Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>


---
 mm/mempolicy.c |   22 +++++++++++-----------
 mm/migrate.c   |   28 +++++++++++++++-------------
 2 files changed, 26 insertions(+), 24 deletions(-)

Index: linux-2.6/mm/mempolicy.c
===================================================================
--- linux-2.6.orig/mm/mempolicy.c	2012-01-13 04:04:36.229807226 -0600
+++ linux-2.6/mm/mempolicy.c	2012-02-24 03:11:44.913710625 -0600
@@ -1318,16 +1318,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
 	rcu_read_lock();
 	task = pid ? find_task_by_vpid(pid) : current;
 	if (!task) {
-		rcu_read_unlock();
 		err = -ESRCH;
-		goto out;
+		goto unlock_out;
 	}
 	mm = get_task_mm(task);
-	rcu_read_unlock();

 	err = -EINVAL;
 	if (!mm)
-		goto out;
+		goto unlock_out;

 	/*
 	 * Check if this process has the right to modify the specified
@@ -1335,33 +1333,31 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
 	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
-	rcu_read_lock();
 	tcred = __task_cred(task);
 	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
 	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
 	    !capable(CAP_SYS_NICE)) {
-		rcu_read_unlock();
 		err = -EPERM;
-		goto out;
+		goto unlock_out;
 	}
-	rcu_read_unlock();

 	task_nodes = cpuset_mems_allowed(task);
 	/* Is the user allowed to access the target nodes? */
 	if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
 		err = -EPERM;
-		goto out;
+		goto unlock_out;
 	}

 	if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) {
 		err = -EINVAL;
-		goto out;
+		goto unlock_out;
 	}

 	err = security_task_movememory(task);
 	if (err)
-		goto out;
+		goto unlock_out;

+	rcu_read_unlock();
 	err = do_migrate_pages(mm, old, new,
 		capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
 out:
@@ -1370,6 +1366,10 @@ out:
 	NODEMASK_SCRATCH_FREE(scratch);

 	return err;
+
+unlock_out:
+	rcu_read_unlock();
+	goto out;
 }


Index: linux-2.6/mm/migrate.c
===================================================================
--- linux-2.6.orig/mm/migrate.c	2012-02-06 04:25:35.857094372 -0600
+++ linux-2.6/mm/migrate.c	2012-02-24 03:18:55.569698851 -0600
@@ -1176,20 +1176,17 @@ set_status:
  * Migrate an array of page address onto an array of nodes and fill
  * the corresponding array of status.
  */
-static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
 			 unsigned long nr_pages,
 			 const void __user * __user *pages,
 			 const int __user *nodes,
 			 int __user *status, int flags)
 {
 	struct page_to_node *pm;
-	nodemask_t task_nodes;
 	unsigned long chunk_nr_pages;
 	unsigned long chunk_start;
 	int err;

-	task_nodes = cpuset_mems_allowed(task);
-
 	err = -ENOMEM;
 	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
 	if (!pm)
@@ -1351,6 +1348,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
 	struct task_struct *task;
 	struct mm_struct *mm;
 	int err;
+	nodemask_t task_nodes;

 	/* Check flags */
 	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1367,10 +1365,11 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
 		return -ESRCH;
 	}
 	mm = get_task_mm(task);
-	rcu_read_unlock();

-	if (!mm)
+	if (!mm) {
+		rcu_read_unlock();
 		return -EINVAL;
+	}

 	/*
 	 * Check if this process has the right to modify the specified
@@ -1378,24 +1377,23 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
 	 * capabilities, superuser privileges or the same
 	 * userid as the target process.
 	 */
-	rcu_read_lock();
 	tcred = __task_cred(task);
 	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
 	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
 	    !capable(CAP_SYS_NICE)) {
-		rcu_read_unlock();
 		err = -EPERM;
-		goto out;
+		goto unlock_out;
 	}
-	rcu_read_unlock();

  	err = security_task_movememory(task);
  	if (err)
-		goto out;
+		goto unlock_out;

+	task_nodes = cpuset_mems_allowed(task);
+	rcu_read_unlock();
 	if (nodes) {
-		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
-				    flags);
+		err = do_pages_move(mm, task_nodes, nr_pages, pages, nodes,
+				status, flags);
 	} else {
 		err = do_pages_stat(mm, nr_pages, pages, status);
 	}
@@ -1403,6 +1401,10 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
 out:
 	mmput(mm);
 	return err;
+
+unlock_out:
+	rcu_read_unlock();
+	goto out;
 }

 /*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]