Re: [PATCH] mm/oom_kill: set oc->constraint in constrained_alloc()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu 13-06-19 21:55:50, Yafang Shao wrote:
> In dump_oom_summary() oc->constraint is used to show
> oom_constraint_text, but it hasn't been set before.
> So the value of it is always the default value 0.
> We should set it in constrained_alloc().

Thanks for catching that.

> Bellow is the output when memcg oom occurs,
> 
> before this patch:
> [  133.078102] oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),
> cpuset=/,mems_allowed=0,oom_memcg=/foo,task_memcg=/foo,task=bash,pid=7997,uid=0
> 
> after this patch:
> [  952.977946] oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),
> cpuset=/,mems_allowed=0,oom_memcg=/foo,task_memcg=/foo,task=bash,pid=13681,uid=0
> 

unless I am missing something
Fixes: ef8444ea01d7 ("mm, oom: reorganize the oom report in dump_header")

The patch looks correct but I think it is more complicated than it needs
to be. Can we do the following instead?

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5a58778c91d4..f719b64741d6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -987,8 +987,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 /*
  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
  */
-static void check_panic_on_oom(struct oom_control *oc,
-			       enum oom_constraint constraint)
+static void check_panic_on_oom(struct oom_control *oc)
 {
 	if (likely(!sysctl_panic_on_oom))
 		return;
@@ -998,7 +997,7 @@ static void check_panic_on_oom(struct oom_control *oc,
 		 * does not panic for cpuset, mempolicy, or memcg allocation
 		 * failures.
 		 */
-		if (constraint != CONSTRAINT_NONE)
+		if (oc->constraint != CONSTRAINT_NONE)
 			return;
 	}
 	/* Do not panic for oom kills triggered by sysrq */
@@ -1035,7 +1034,6 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
 bool out_of_memory(struct oom_control *oc)
 {
 	unsigned long freed = 0;
-	enum oom_constraint constraint = CONSTRAINT_NONE;
 
 	if (oom_killer_disabled)
 		return false;
@@ -1071,10 +1069,10 @@ bool out_of_memory(struct oom_control *oc)
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA and memcg) that may require different handling.
 	 */
-	constraint = constrained_alloc(oc);
-	if (constraint != CONSTRAINT_MEMORY_POLICY)
+	oc->constraint = constrained_alloc(oc);
+	if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
 		oc->nodemask = NULL;
-	check_panic_on_oom(oc, constraint);
+	check_panic_on_oom(oc);
 
 	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
 	    current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&

I guess the current confusion comes from the fact that we have
constraint both in the oom_control and a local variable so I would
rather remove that. What do you think?

> Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx>
> ---
>  mm/oom_kill.c | 35 +++++++++++++++++++++++++----------
>  1 file changed, 25 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 5a58778..075e5cf 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -261,29 +261,37 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
>  	struct zone *zone;
>  	struct zoneref *z;
>  	enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
> +	enum oom_constraint constraint;
>  	bool cpuset_limited = false;
>  	int nid;
>  
>  	if (is_memcg_oom(oc)) {
>  		oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
> -		return CONSTRAINT_MEMCG;
> +		constraint = CONSTRAINT_MEMCG;
> +		goto out;
>  	}
>  
>  	/* Default to all available memory */
>  	oc->totalpages = totalram_pages() + total_swap_pages;
>  
> -	if (!IS_ENABLED(CONFIG_NUMA))
> -		return CONSTRAINT_NONE;
> +	if (!IS_ENABLED(CONFIG_NUMA)) {
> +		constraint = CONSTRAINT_NONE;
> +		goto out;
> +	}
>  
> -	if (!oc->zonelist)
> -		return CONSTRAINT_NONE;
> +	if (!oc->zonelist) {
> +		constraint = CONSTRAINT_NONE;
> +		goto out;
> +	}
>  	/*
>  	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
>  	 * to kill current.We have to random task kill in this case.
>  	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
>  	 */
> -	if (oc->gfp_mask & __GFP_THISNODE)
> -		return CONSTRAINT_NONE;
> +	if (oc->gfp_mask & __GFP_THISNODE) {
> +		constraint = CONSTRAINT_NONE;
> +		goto out;
> +	}
>  
>  	/*
>  	 * This is not a __GFP_THISNODE allocation, so a truncated nodemask in
> @@ -295,7 +303,8 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
>  		oc->totalpages = total_swap_pages;
>  		for_each_node_mask(nid, *oc->nodemask)
>  			oc->totalpages += node_spanned_pages(nid);
> -		return CONSTRAINT_MEMORY_POLICY;
> +		constraint = CONSTRAINT_MEMORY_POLICY;
> +		goto out;
>  	}
>  
>  	/* Check this allocation failure is caused by cpuset's wall function */
> @@ -308,9 +317,15 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
>  		oc->totalpages = total_swap_pages;
>  		for_each_node_mask(nid, cpuset_current_mems_allowed)
>  			oc->totalpages += node_spanned_pages(nid);
> -		return CONSTRAINT_CPUSET;
> +		constraint = CONSTRAINT_CPUSET;
> +		goto out;
>  	}
> -	return CONSTRAINT_NONE;
> +
> +	constraint = CONSTRAINT_NONE;
> +
> +out:
> +	oc->constraint = constraint;
> +	return constraint;
>  }
>  
>  static int oom_evaluate_task(struct task_struct *task, void *arg)
> -- 
> 1.8.3.1
> 

-- 
Michal Hocko
SUSE Labs




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux