Re: [PATCH] mm, oom: simplify task's refcount handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed 24-07-19 12:54:36, Tetsuo Handa wrote:
> Currently out_of_memory() is full of get_task_struct()/put_task_struct()
> calls. Since "mm, oom: avoid printk() iteration under RCU" introduced
> a list for holding a snapshot of all OOM victim candidates, let's share
> that list for select_bad_process() and oom_kill_process() in order to
> simplify task's refcount handling.
> 
> As a result of this patch, get_task_struct()/put_task_struct() calls
> in out_of_memory() are reduced to only 2 times respectively.

This is probably a matter of taste but the diffstat suggests to me that the
simplification is not all that great. On the other hand this makes the
oom handling even more tricky and harder for potential further
development - e.g. if we ever need to break the global lock down in the
future this would be another obstacle on the way. While potential
development might be too theoretical the benefit of the patch is not
really clear to me. The task_struct reference counting is not really
unusual operations and there is nothing really scary that we do with it
here. We already have to to extra mile wrt. task_lock so careful
reference count doesn't really jump out.

That being said, I do not think this patch gives any improvement.

> Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxx>
> Cc: Roman Gushchin <guro@xxxxxx>
> Cc: David Rientjes <rientjes@xxxxxxxxxx>
> ---
>  include/linux/sched.h |   2 +-
>  mm/oom_kill.c         | 122 ++++++++++++++++++++++++--------------------------
>  2 files changed, 60 insertions(+), 64 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 48c1a4c..4062999 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1247,7 +1247,7 @@ struct task_struct {
>  #ifdef CONFIG_MMU
>  	struct task_struct		*oom_reaper_list;
>  #endif
> -	struct list_head		oom_victim_list;
> +	struct list_head		oom_candidate;
>  #ifdef CONFIG_VMAP_STACK
>  	struct vm_struct		*stack_vm_area;
>  #endif
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 110f948..311e0e9 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -63,6 +63,7 @@
>   * and mark_oom_victim
>   */
>  DEFINE_MUTEX(oom_lock);
> +static LIST_HEAD(oom_candidate_list);
>  
>  static inline bool is_memcg_oom(struct oom_control *oc)
>  {
> @@ -167,6 +168,41 @@ static bool oom_unkillable_task(struct task_struct *p)
>  	return false;
>  }
>  
> +static int add_candidate_task(struct task_struct *p, void *unused)
> +{
> +	if (!oom_unkillable_task(p)) {
> +		get_task_struct(p);
> +		list_add_tail(&p->oom_candidate, &oom_candidate_list);
> +	}
> +	return 0;
> +}
> +
> +static void link_oom_candidates(struct oom_control *oc)
> +{
> +	struct task_struct *p;
> +
> +	if (is_memcg_oom(oc))
> +		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, NULL);
> +	else {
> +		rcu_read_lock();
> +		for_each_process(p)
> +			add_candidate_task(p, NULL);
> +		rcu_read_unlock();
> +	}
> +
> +}
> +
> +static void unlink_oom_candidates(void)
> +{
> +	struct task_struct *p;
> +	struct task_struct *t;
> +
> +	list_for_each_entry_safe(p, t, &oom_candidate_list, oom_candidate) {
> +		list_del(&p->oom_candidate);
> +		put_task_struct(p);
> +	}
> +}
> +
>  /*
>   * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
>   * than all user memory (LRU pages)
> @@ -344,16 +380,11 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
>  		goto next;
>  
>  select:
> -	if (oc->chosen)
> -		put_task_struct(oc->chosen);
> -	get_task_struct(task);
>  	oc->chosen = task;
>  	oc->chosen_points = points;
>  next:
>  	return 0;
>  abort:
> -	if (oc->chosen)
> -		put_task_struct(oc->chosen);
>  	oc->chosen = (void *)-1UL;
>  	return 1;
>  }
> @@ -364,27 +395,13 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
>   */
>  static void select_bad_process(struct oom_control *oc)
>  {
> -	if (is_memcg_oom(oc))
> -		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
> -	else {
> -		struct task_struct *p;
> -
> -		rcu_read_lock();
> -		for_each_process(p)
> -			if (oom_evaluate_task(p, oc))
> -				break;
> -		rcu_read_unlock();
> -	}
> -}
> -
> +	struct task_struct *p;
>  
> -static int add_candidate_task(struct task_struct *p, void *arg)
> -{
> -	if (!oom_unkillable_task(p)) {
> -		get_task_struct(p);
> -		list_add_tail(&p->oom_victim_list, (struct list_head *) arg);
> +	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
> +		cond_resched();
> +		if (oom_evaluate_task(p, oc))
> +			break;
>  	}
> -	return 0;
>  }
>  
>  /**
> @@ -399,21 +416,12 @@ static int add_candidate_task(struct task_struct *p, void *arg)
>   */
>  static void dump_tasks(struct oom_control *oc)
>  {
> -	static LIST_HEAD(list);
>  	struct task_struct *p;
>  	struct task_struct *t;
>  
> -	if (is_memcg_oom(oc))
> -		mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list);
> -	else {
> -		rcu_read_lock();
> -		for_each_process(p)
> -			add_candidate_task(p, &list);
> -		rcu_read_unlock();
> -	}
>  	pr_info("Tasks state (memory values in pages):\n");
>  	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
> -	list_for_each_entry(p, &list, oom_victim_list) {
> +	list_for_each_entry(p, &oom_candidate_list, oom_candidate) {
>  		cond_resched();
>  		/* p may not have freeable memory in nodemask */
>  		if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
> @@ -430,10 +438,6 @@ static void dump_tasks(struct oom_control *oc)
>  			t->signal->oom_score_adj, t->comm);
>  		task_unlock(t);
>  	}
> -	list_for_each_entry_safe(p, t, &list, oom_victim_list) {
> -		list_del(&p->oom_victim_list);
> -		put_task_struct(p);
> -	}
>  }
>  
>  static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
> @@ -859,17 +863,11 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  	bool can_oom_reap = true;
>  
>  	p = find_lock_task_mm(victim);
> -	if (!p) {
> -		put_task_struct(victim);
> +	if (!p)
>  		return;
> -	} else if (victim != p) {
> -		get_task_struct(p);
> -		put_task_struct(victim);
> -		victim = p;
> -	}
>  
> -	/* Get a reference to safely compare mm after task_unlock(victim) */
> -	mm = victim->mm;
> +	/* Get a reference to safely compare mm after task_unlock(p) */
> +	mm = p->mm;
>  	mmgrab(mm);
>  
>  	/* Raise event before sending signal: task reaper must see this */
> @@ -881,16 +879,15 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  	 * in order to prevent the OOM victim from depleting the memory
>  	 * reserves from the user space under its control.
>  	 */
> -	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
> -	mark_oom_victim(victim);
> +	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
> +	mark_oom_victim(p);
>  	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u\n",
> -		message, task_pid_nr(victim), victim->comm,
> -		K(victim->mm->total_vm),
> -		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
> -		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
> -		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)),
> -		from_kuid(&init_user_ns, task_uid(victim)));
> -	task_unlock(victim);
> +	       message, task_pid_nr(p), p->comm, K(mm->total_vm),
> +	       K(get_mm_counter(mm, MM_ANONPAGES)),
> +	       K(get_mm_counter(mm, MM_FILEPAGES)),
> +	       K(get_mm_counter(mm, MM_SHMEMPAGES)),
> +	       from_kuid(&init_user_ns, task_uid(p)));
> +	task_unlock(p);
>  
>  	/*
>  	 * Kill all user processes sharing victim->mm in other thread groups, if
> @@ -929,7 +926,6 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  		wake_oom_reaper(victim);
>  
>  	mmdrop(mm);
> -	put_task_struct(victim);
>  }
>  #undef K
>  
> @@ -940,10 +936,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
>  static int oom_kill_memcg_member(struct task_struct *task, void *message)
>  {
>  	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
> -	    !is_global_init(task)) {
> -		get_task_struct(task);
> +	    !is_global_init(task))
>  		__oom_kill_process(task, message);
> -	}
>  	return 0;
>  }
>  
> @@ -964,7 +958,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
>  		mark_oom_victim(victim);
>  		wake_oom_reaper(victim);
>  		task_unlock(victim);
> -		put_task_struct(victim);
>  		return;
>  	}
>  	task_unlock(victim);
> @@ -1073,6 +1066,8 @@ bool out_of_memory(struct oom_control *oc)
>  	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
>  		return true;
>  
> +	link_oom_candidates(oc);
> +
>  	/*
>  	 * Check if there were limitations on the allocation (only relevant for
>  	 * NUMA and memcg) that may require different handling.
> @@ -1086,10 +1081,9 @@ bool out_of_memory(struct oom_control *oc)
>  	    current->mm && !oom_unkillable_task(current) &&
>  	    oom_cpuset_eligible(current, oc) &&
>  	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
> -		get_task_struct(current);
>  		oc->chosen = current;
>  		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
> -		return true;
> +		goto done;
>  	}
>  
>  	select_bad_process(oc);
> @@ -1108,6 +1102,8 @@ bool out_of_memory(struct oom_control *oc)
>  	if (oc->chosen && oc->chosen != (void *)-1UL)
>  		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
>  				 "Memory cgroup out of memory");
> + done:
> +	unlink_oom_candidates();
>  	return !!oc->chosen;
>  }
>  
> -- 
> 1.8.3.1

-- 
Michal Hocko
SUSE Labs




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux