Re: [PATCH 1/2] userfaultfd: use RCU to free the task struct when fork fails

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 26.03.2019 01:56, Andrea Arcangeli wrote:
> MEMCG depends on the task structure not to be freed under
> rcu_read_lock() in get_mem_cgroup_from_mm() after it dereferences
> mm->owner.
> 
> An alternate possible fix would be to defer the delivery of the
> userfaultfd contexts to the monitor until after fork() is guaranteed
> to succeed. Such a change would require more changes because it would
> create a strict ordering dependency where the uffd methods would need
> to be called beyond the last potentially failing branch in order to be
> safe. This solution as opposed only adds the dependency to common code
> to set mm->owner to NULL and to free the task struct that was pointed
> by mm->owner with RCU, if fork ends up failing. The userfaultfd
> methods can still be called anywhere during the fork runtime and the
> monitor will keep discarding orphaned "mm" coming from failed forks in
> userland.
> 
> This race condition couldn't trigger if CONFIG_MEMCG was set =n at
> build time.
> 
> Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event")
> Cc: stable@xxxxxxxxxx
> Tested-by: zhong jiang <zhongjiang@xxxxxxxxxx>
> Reported-by: syzbot+cbb52e396df3e565ab02@xxxxxxxxxxxxxxxxxxxxxxxxx
> Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
> ---
>  kernel/fork.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 9dcd18aa210b..a19790e27afd 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -952,6 +952,15 @@ static void mm_init_aio(struct mm_struct *mm)
>  #endif
>  }
>  
> +static __always_inline void mm_clear_owner(struct mm_struct *mm,
> +					   struct task_struct *p)
> +{
> +#ifdef CONFIG_MEMCG
> +	if (mm->owner == p)
> +		WRITE_ONCE(mm->owner, NULL);
> +#endif
> +}
> +
>  static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
>  {
>  #ifdef CONFIG_MEMCG
> @@ -1331,6 +1340,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
>  free_pt:
>  	/* don't put binfmt in mmput, we haven't got module yet */
>  	mm->binfmt = NULL;
> +	mm_init_owner(mm, NULL);
>  	mmput(mm);
>  
>  fail_nomem:
> @@ -1662,6 +1672,24 @@ static inline void rcu_copy_process(struct task_struct *p)
>  #endif /* #ifdef CONFIG_TASKS_RCU */
>  }
>  
> +#ifdef CONFIG_MEMCG
> +static void __delayed_free_task(struct rcu_head *rhp)
> +{
> +	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
> +
> +	free_task(tsk);
> +}
> +#endif /* CONFIG_MEMCG */
> +
> +static __always_inline void delayed_free_task(struct task_struct *tsk)
> +{
> +#ifdef CONFIG_MEMCG
> +	call_rcu(&tsk->rcu, __delayed_free_task);
> +#else /* CONFIG_MEMCG */
> +	free_task(tsk);
> +#endif /* CONFIG_MEMCG */
> +}
> +
>  /*
>   * This creates a new process as a copy of the old one,
>   * but does not actually start it yet.
> @@ -2123,8 +2151,10 @@ static __latent_entropy struct task_struct *copy_process(
>  bad_fork_cleanup_namespaces:
>  	exit_task_namespaces(p);
>  bad_fork_cleanup_mm:
> -	if (p->mm)
> +	if (p->mm) {
> +		mm_clear_owner(p->mm, p);
>  		mmput(p->mm);
> +	}
>  bad_fork_cleanup_signal:
>  	if (!(clone_flags & CLONE_THREAD))
>  		free_signal_struct(p->signal);
> @@ -2155,7 +2185,7 @@ static __latent_entropy struct task_struct *copy_process(
>  bad_fork_free:
>  	p->state = TASK_DEAD;
>  	put_task_stack(p);
> -	free_task(p);
> +	delayed_free_task(p);

Can't call_rcu(&p->rcu, delayed_put_task_struct) be used instead this?

Kirill




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux