Re: Patch "mm: consider memblock reservations for deferred memory initialization sizing" has been added to the 4.4-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nope, dropped from 4.4 as it broke the build there :(



On Mon, Jun 05, 2017 at 03:16:51PM +0200, gregkh@xxxxxxxxxxxxxxxxxxx wrote:
> 
> This is a note to let you know that I've just added the patch titled
> 
>     mm: consider memblock reservations for deferred memory initialization sizing
> 
> to the 4.4-stable tree which can be found at:
>     http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary
> 
> The filename of the patch is:
>      mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch
> and it can be found in the queue-4.4 subdirectory.
> 
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable@xxxxxxxxxxxxxxx> know about it.
> 
> 
> >From 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 Mon Sep 17 00:00:00 2001
> From: Michal Hocko <mhocko@xxxxxxxx>
> Date: Fri, 2 Jun 2017 14:46:49 -0700
> Subject: mm: consider memblock reservations for deferred memory initialization sizing
> 
> From: Michal Hocko <mhocko@xxxxxxxx>
> 
> commit 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 upstream.
> 
> We have seen an early OOM killer invocation on ppc64 systems with
> crashkernel=4096M:
> 
> 	kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0
> 	kthreadd cpuset=/ mems_allowed=7
> 	CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1
> 	Call Trace:
> 	  dump_stack+0xb0/0xf0 (unreliable)
> 	  dump_header+0xb0/0x258
> 	  out_of_memory+0x5f0/0x640
> 	  __alloc_pages_nodemask+0xa8c/0xc80
> 	  kmem_getpages+0x84/0x1a0
> 	  fallback_alloc+0x2a4/0x320
> 	  kmem_cache_alloc_node+0xc0/0x2e0
> 	  copy_process.isra.25+0x260/0x1b30
> 	  _do_fork+0x94/0x470
> 	  kernel_thread+0x48/0x60
> 	  kthreadd+0x264/0x330
> 	  ret_from_kernel_thread+0x5c/0xa4
> 
> 	Mem-Info:
> 	active_anon:0 inactive_anon:0 isolated_anon:0
> 	 active_file:0 inactive_file:0 isolated_file:0
> 	 unevictable:0 dirty:0 writeback:0 unstable:0
> 	 slab_reclaimable:5 slab_unreclaimable:73
> 	 mapped:0 shmem:0 pagetables:0 bounce:0
> 	 free:0 free_pcp:0 free_cma:0
> 	Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
> 	lowmem_reserve[]: 0 0 0 0
> 	Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
> 	0 total pagecache pages
> 	0 pages in swap cache
> 	Swap cache stats: add 0, delete 0, find 0/0
> 	Free swap  = 0kB
> 	Total swap = 0kB
> 	819200 pages RAM
> 	0 pages HighMem/MovableOnly
> 	817481 pages reserved
> 	0 pages cma reserved
> 	0 pages hwpoisoned
> 
> the reason is that the managed memory is too low (only 110MB) while the
> rest of the the 50GB is still waiting for the deferred intialization to
> be done.  update_defer_init estimates the initial memoty to initialize
> to 2GB at least but it doesn't consider any memory allocated in that
> range.  In this particular case we've had
> 
> 	Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB)
> 
> so the low 2GB is mostly depleted.
> 
> Fix this by considering memblock allocations in the initial static
> initialization estimation.  Move the max_initialise to
> reset_deferred_meminit and implement a simple memblock_reserved_memory
> helper which iterates all reserved blocks and sums the size of all that
> start below the given address.  The cumulative size is than added on top
> of the initial estimation.  This is still not ideal because
> reset_deferred_meminit doesn't consider holes and so reservation might
> be above the initial estimation whihch we ignore but let's make the
> logic simpler until we really need to handle more complicated cases.
> 
> Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set")
> Link: http://lkml.kernel.org/r/20170531104010.GI27783@xxxxxxxxxxxxxx
> Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
> Acked-by: Mel Gorman <mgorman@xxxxxxx>
> Tested-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> 
> ---
>  include/linux/memblock.h |    8 ++++++++
>  include/linux/mmzone.h   |    1 +
>  mm/memblock.c            |   23 +++++++++++++++++++++++
>  mm/page_alloc.c          |   24 ++++++++++++++++++++++--
>  4 files changed, 54 insertions(+), 2 deletions(-)
> 
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -408,11 +408,19 @@ static inline void early_memtest(phys_ad
>  }
>  #endif
>  
> +extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
> +		phys_addr_t end_addr);
>  #else
>  static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
>  {
>  	return 0;
>  }
> +
> +static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr,
> +		phys_addr_t end_addr)
> +{
> +	return 0;
> +}
>  
>  #endif /* CONFIG_HAVE_MEMBLOCK */
>  
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -688,6 +688,7 @@ typedef struct pglist_data {
>  	 * is the first PFN that needs to be initialised.
>  	 */
>  	unsigned long first_deferred_pfn;
> +	unsigned long static_init_size;
>  #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
>  } pg_data_t;
>  
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1634,6 +1634,29 @@ static void __init_memblock memblock_dum
>  	}
>  }
>  
> +extern unsigned long __init_memblock
> +memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr)
> +{
> +	struct memblock_region *rgn;
> +	unsigned long size = 0;
> +	int idx;
> +
> +	for_each_memblock_type((&memblock.reserved), rgn) {
> +		phys_addr_t start, end;
> +
> +		if (rgn->base + rgn->size < start_addr)
> +			continue;
> +		if (rgn->base > end_addr)
> +			continue;
> +
> +		start = rgn->base;
> +		end = start + rgn->size;
> +		size += end - start;
> +	}
> +
> +	return size;
> +}
> +
>  void __init_memblock __memblock_dump_all(void)
>  {
>  	pr_info("MEMBLOCK configuration:\n");
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -269,6 +269,26 @@ int page_group_by_mobility_disabled __re
>  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
>  static inline void reset_deferred_meminit(pg_data_t *pgdat)
>  {
> +	unsigned long max_initialise;
> +	unsigned long reserved_lowmem;
> +
> +	/*
> +	 * Initialise at least 2G of a node but also take into account that
> +	 * two large system hashes that can take up 1GB for 0.25TB/node.
> +	 */
> +	max_initialise = max(2UL << (30 - PAGE_SHIFT),
> +		(pgdat->node_spanned_pages >> 8));
> +
> +	/*
> +	 * Compensate the all the memblock reservations (e.g. crash kernel)
> +	 * from the initial estimation to make sure we will initialize enough
> +	 * memory to boot.
> +	 */
> +	reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
> +			pgdat->node_start_pfn + max_initialise);
> +	max_initialise += reserved_lowmem;
> +
> +	pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
>  	pgdat->first_deferred_pfn = ULONG_MAX;
>  }
>  
> @@ -305,7 +325,7 @@ static inline bool update_defer_init(pg_
>  
>  	/* Initialise at least 2G of the highest zone */
>  	(*nr_initialised)++;
> -	if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
> +	if ((*nr_initialised > pgdat->static_init_size) &&
>  	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
>  		pgdat->first_deferred_pfn = pfn;
>  		return false;
> @@ -5343,7 +5363,6 @@ void __paginginit free_area_init_node(in
>  	/* pg_data_t should be reset to zero when it's allocated */
>  	WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
>  
> -	reset_deferred_meminit(pgdat);
>  	pgdat->node_id = nid;
>  	pgdat->node_start_pfn = node_start_pfn;
>  #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
> @@ -5362,6 +5381,7 @@ void __paginginit free_area_init_node(in
>  		(unsigned long)pgdat->node_mem_map);
>  #endif
>  
> +	reset_deferred_meminit(pgdat);
>  	free_area_init_core(pgdat);
>  }
>  
> 
> 
> Patches currently in stable-queue which might be from mhocko@xxxxxxxx are
> 
> queue-4.4/mm-consider-memblock-reservations-for-deferred-memory-initialization-sizing.patch



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]