[CC Andrew] On Fri 11-08-17 17:05:54, Pavel Tatashin wrote: > There is existing use after free bug when deferred struct pages are > enabled: > > The memblock_add() allocates memory for the memory array if more than > 128 entries are needed. See comment in e820__memblock_setup(): > > * The bootstrap memblock region count maximum is 128 entries > * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries > * than that - so allow memblock resizing. > > This memblock memory is freed here: > free_low_memory_core_early() > > We access the freed memblock.memory later in boot when deferred pages are > initialized in this path: > > deferred_init_memmap() > for_each_mem_pfn_range() > __next_mem_pfn_range() > type = &memblock.memory; > > One possible explanation for why this use-after-free hasn't been hit > before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded > at least on systems where deferred struct pages were enabled. > > Tested by reducing INIT_MEMBLOCK_REGIONS down to 4 from the current 128, > and verifying in qemu that this code is getting excuted and that the freed > pages are sane. > > Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") > Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> > Reviewed-by: Steven Sistare <steven.sistare@xxxxxxxxxx> > Reviewed-by: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> > Reviewed-by: Bob Picco <bob.picco@xxxxxxxxxx> > Acked-by: Michal Hocko <mhocko@xxxxxxxx> As already mentioned I believe Cc: stable is reasonable. > --- > include/linux/memblock.h | 6 ++++-- > mm/memblock.c | 38 +++++++++++++++++--------------------- > mm/nobootmem.c | 16 ---------------- > mm/page_alloc.c | 4 ++++ > 4 files changed, 25 insertions(+), 39 deletions(-) > > diff --git a/include/linux/memblock.h b/include/linux/memblock.h > index 77d427974f57..bae11c7e7bf3 100644 > --- a/include/linux/memblock.h > +++ b/include/linux/memblock.h > @@ -61,6 +61,7 @@ extern int memblock_debug; > #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK > #define __init_memblock __meminit > #define __initdata_memblock __meminitdata > +void memblock_discard(void); > #else > #define __init_memblock > #define __initdata_memblock > @@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, > int nid, ulong flags); > phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, > phys_addr_t size, phys_addr_t align); > -phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); > -phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); > void memblock_allow_resize(void); > int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); > int memblock_add(phys_addr_t base, phys_addr_t size); > @@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, > void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, > phys_addr_t *out_end); > > +void __memblock_free_early(phys_addr_t base, phys_addr_t size); > +void __memblock_free_late(phys_addr_t base, phys_addr_t size); > + > /** > * for_each_mem_range - iterate through memblock areas from type_a and not > * included in type_b. Or just type_a if type_b is NULL. > diff --git a/mm/memblock.c b/mm/memblock.c > index 2cb25fe4452c..bf14aea6ab70 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u > } > > #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK > - > -phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( > - phys_addr_t *addr) > -{ > - if (memblock.reserved.regions == memblock_reserved_init_regions) > - return 0; > - > - *addr = __pa(memblock.reserved.regions); > - > - return PAGE_ALIGN(sizeof(struct memblock_region) * > - memblock.reserved.max); > -} > - > -phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( > - phys_addr_t *addr) > +/** > + * Discard memory and reserved arrays if they were allocated > + */ > +void __init memblock_discard(void) > { > - if (memblock.memory.regions == memblock_memory_init_regions) > - return 0; > + phys_addr_t addr, size; > > - *addr = __pa(memblock.memory.regions); > + if (memblock.reserved.regions != memblock_reserved_init_regions) { > + addr = __pa(memblock.reserved.regions); > + size = PAGE_ALIGN(sizeof(struct memblock_region) * > + memblock.reserved.max); > + __memblock_free_late(addr, size); > + } > > - return PAGE_ALIGN(sizeof(struct memblock_region) * > - memblock.memory.max); > + if (memblock.memory.regions == memblock_memory_init_regions) { > + addr = __pa(memblock.memory.regions); > + size = PAGE_ALIGN(sizeof(struct memblock_region) * > + memblock.memory.max); > + __memblock_free_late(addr, size); > + } > } > - > #endif > > /** > diff --git a/mm/nobootmem.c b/mm/nobootmem.c > index 36454d0f96ee..3637809a18d0 100644 > --- a/mm/nobootmem.c > +++ b/mm/nobootmem.c > @@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void) > NULL) > count += __free_memory_core(start, end); > > -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK > - { > - phys_addr_t size; > - > - /* Free memblock.reserved array if it was allocated */ > - size = get_allocated_memblock_reserved_regions_info(&start); > - if (size) > - count += __free_memory_core(start, start + size); > - > - /* Free memblock.memory array if it was allocated */ > - size = get_allocated_memblock_memory_regions_info(&start); > - if (size) > - count += __free_memory_core(start, start + size); > - } > -#endif > - > return count; > } > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index fc32aa81f359..63d16c185736 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void) > /* Reinit limits that are based on free pages after the kernel is up */ > files_maxfiles_init(); > #endif > +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK > + /* Discard memblock private memory */ > + memblock_discard(); > +#endif > > for_each_populated_zone(zone) > set_zone_contiguous(zone); > -- > 2.14.0 -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>