Re: [RFC][PATCH] fix move/migrate_pages() race on task struct

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Christoph Lameter <cl@xxxxxxxxx> writes:

> On Thu, 23 Feb 2012, Eric W. Biederman wrote:
>
>> > The bug in migrate_pages() is that we do a rcu_unlock and a rcu_lock. If
>> > we drop those then we should be safe if the use of a task pointer within a
>> > rcu section is safe without taking a refcount.
>>
>> Yes the user of a task_struct pointer found via a userspace pid is valid
>> for the life of an rcu critical section, and the bug is indeed that we
>> drop the rcu_lock and somehow expect the task to remain valid.
>>
>> The guarantee comes from release_task.  In release_task we call
>> __exit_signal which calls __unhash_process, and then we call
>> delayed_put_task to guarantee that the task lives until the end of the
>> rcu interval.
>
> Ah. Ok. Great.
>
>> In migrate_pages we have a lot of task accesses outside of the rcu
>> critical section, and without a reference count on task.
>
> Yes but that is only of interesting for setup and verification of
> permissions. What matters during migration is that the mm_struct does not
> go away and we take a refcount on that one.
>
>> I tell you the truth trying to figure out what that code needs to be
>> correct if task != current makes my head hurt.
>
> Hmm...

Especially because I was looking at move_pages...

>> I think we need to grab a reference on task_struct, to stop the task
>> from going away, and in addition we need to hold task_lock.  To keep
>> task->mm from changing (see exec_mmap).  But we can't do that and sleep
>> so I think the entire function needs to be rewritten, and the need for
>> task deep in the migrate_pages path needs to be removed as even with the
>> reference count held we can race with someone calling exec.
>
> We dont need the task during migration. We only need the mm. The task
> is safe until rcu_read_unlock therefore maybe the following should fix
> migrate pages:
>
>
> Subject: migration: Do not do rcu_read_unlock until the last time we need the task_struct pointer
>
> Migration functions perform the rcu_read_unlock too early. As a result the
> task pointed to may change. Bugs were introduced when adding security checks
> because rcu_unlock/lock sequences were inserted. Plus the security checks
> and do_move_pages used the task_struct pointer after rcu_unlock.
>
> Fix those issues by removing the unlock/lock sequences and moving the
> rcu_read_unlock after the last use of the task struct pointer.
>
> Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>

Acked-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>

Somehow in my skimming through the code earlier I thought the
situation was worse.  On the assumption there are not any
sleeping functions along the path this looks like a good fix.

> ---
>  mm/mempolicy.c |   22 +++++++++++-----------
>  mm/migrate.c   |   28 +++++++++++++++-------------
>  2 files changed, 26 insertions(+), 24 deletions(-)
>
> Index: linux-2.6/mm/mempolicy.c
> ===================================================================
> --- linux-2.6.orig/mm/mempolicy.c	2012-01-13 04:04:36.229807226 -0600
> +++ linux-2.6/mm/mempolicy.c	2012-02-24 03:11:44.913710625 -0600
> @@ -1318,16 +1318,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
>  	rcu_read_lock();
>  	task = pid ? find_task_by_vpid(pid) : current;
>  	if (!task) {
> -		rcu_read_unlock();
>  		err = -ESRCH;
> -		goto out;
> +		goto unlock_out;
>  	}
>  	mm = get_task_mm(task);
> -	rcu_read_unlock();
>
>  	err = -EINVAL;
>  	if (!mm)
> -		goto out;
> +		goto unlock_out;
>
>  	/*
>  	 * Check if this process has the right to modify the specified
> @@ -1335,33 +1333,31 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
>  	 * capabilities, superuser privileges or the same
>  	 * userid as the target process.
>  	 */
> -	rcu_read_lock();
>  	tcred = __task_cred(task);
>  	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
>  	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
>  	    !capable(CAP_SYS_NICE)) {
> -		rcu_read_unlock();
>  		err = -EPERM;
> -		goto out;
> +		goto unlock_out;
>  	}
> -	rcu_read_unlock();
>
>  	task_nodes = cpuset_mems_allowed(task);
>  	/* Is the user allowed to access the target nodes? */
>  	if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
>  		err = -EPERM;
> -		goto out;
> +		goto unlock_out;
>  	}
>
>  	if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) {
>  		err = -EINVAL;
> -		goto out;
> +		goto unlock_out;
>  	}
>
>  	err = security_task_movememory(task);
>  	if (err)
> -		goto out;
> +		goto unlock_out;
>
> +	rcu_read_unlock();
>  	err = do_migrate_pages(mm, old, new,
>  		capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
>  out:
> @@ -1370,6 +1366,10 @@ out:
>  	NODEMASK_SCRATCH_FREE(scratch);
>
>  	return err;
> +
> +unlock_out:
> +	rcu_read_unlock();
> +	goto out;
>  }
>
>
> Index: linux-2.6/mm/migrate.c
> ===================================================================
> --- linux-2.6.orig/mm/migrate.c	2012-02-06 04:25:35.857094372 -0600
> +++ linux-2.6/mm/migrate.c	2012-02-24 03:18:55.569698851 -0600
> @@ -1176,20 +1176,17 @@ set_status:
>   * Migrate an array of page address onto an array of nodes and fill
>   * the corresponding array of status.
>   */
> -static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
> +static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
>  			 unsigned long nr_pages,
>  			 const void __user * __user *pages,
>  			 const int __user *nodes,
>  			 int __user *status, int flags)
>  {
>  	struct page_to_node *pm;
> -	nodemask_t task_nodes;
>  	unsigned long chunk_nr_pages;
>  	unsigned long chunk_start;
>  	int err;
>
> -	task_nodes = cpuset_mems_allowed(task);
> -
>  	err = -ENOMEM;
>  	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
>  	if (!pm)
> @@ -1351,6 +1348,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  	struct task_struct *task;
>  	struct mm_struct *mm;
>  	int err;
> +	nodemask_t task_nodes;
>
>  	/* Check flags */
>  	if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
> @@ -1367,10 +1365,11 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  		return -ESRCH;
>  	}
>  	mm = get_task_mm(task);
> -	rcu_read_unlock();
>
> -	if (!mm)
> +	if (!mm) {
> +		rcu_read_unlock();
>  		return -EINVAL;
> +	}
>
>  	/*
>  	 * Check if this process has the right to modify the specified
> @@ -1378,24 +1377,23 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  	 * capabilities, superuser privileges or the same
>  	 * userid as the target process.
>  	 */
> -	rcu_read_lock();
>  	tcred = __task_cred(task);
>  	if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
>  	    cred->uid  != tcred->suid && cred->uid  != tcred->uid &&
>  	    !capable(CAP_SYS_NICE)) {
> -		rcu_read_unlock();
>  		err = -EPERM;
> -		goto out;
> +		goto unlock_out;
>  	}
> -	rcu_read_unlock();
>
>   	err = security_task_movememory(task);
>   	if (err)
> -		goto out;
> +		goto unlock_out;
>
> +	task_nodes = cpuset_mems_allowed(task);
> +	rcu_read_unlock();
>  	if (nodes) {
> -		err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
> -				    flags);
> +		err = do_pages_move(mm, task_nodes, nr_pages, pages, nodes,
> +				status, flags);
>  	} else {
>  		err = do_pages_stat(mm, nr_pages, pages, status);
>  	}
> @@ -1403,6 +1401,10 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
>  out:
>  	mmput(mm);
>  	return err;
> +
> +unlock_out:
> +	rcu_read_unlock();
> +	goto out;
>  }
>
>  /*

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]