The patch titled Subject: mm: initialize pages on demand during boot has been added to the -mm tree. Its filename is mm-initialize-pages-on-demand-during-boot-v5.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-initialize-pages-on-demand-during-boot-v5.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-initialize-pages-on-demand-during-boot-v5.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Subject: mm: initialize pages on demand during boot Link: http://lkml.kernel.org/r/20180309220807.24961-3-pasha.tatashin@xxxxxxxxxx Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Reviewed-by: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Reviewed-by: Steven Sistare <steven.sistare@xxxxxxxxxx> Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Tested-by: Masayoshi Mizuma <m.mizuma@xxxxxxxxxxxxxx> Acked-by: Mel Gorman <mgorman@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/page_alloc.c | 68 +++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff -puN mm/page_alloc.c~mm-initialize-pages-on-demand-during-boot-v5 mm/page_alloc.c --- a/mm/page_alloc.c~mm-initialize-pages-on-demand-during-boot-v5 +++ a/mm/page_alloc.c @@ -1572,14 +1572,10 @@ static int __init deferred_init_memmap(v } /* - * This lock guarantees that only one thread at a time is allowed to grow zones - * (decrease number of deferred pages). - * Protects first_deferred_pfn field in all zones during early boot before - * deferred pages are initialized. Deferred pages are initialized in - * page_alloc_init_late() soon after smp_init() is complete. + * During boot we initialize deferred pages on-demand, as needed, but once + * page_alloc_init_late() has finished, the deferred pages are all initialized, + * and we can permanently disable path. */ -static __initdata DEFINE_SPINLOCK(deferred_zone_grow_lock); -static bool deferred_zone_grow __initdata = true; static DEFINE_STATIC_KEY_TRUE(deferred_pages); /* @@ -1589,8 +1585,9 @@ static DEFINE_STATIC_KEY_TRUE(deferred_p * of SECTION_SIZE bytes by initializing struct pages in increments of * PAGES_PER_SECTION * sizeof(struct page) bytes. * - * Return true when zone was grown by at least number of pages specified by - * order. Otherwise return false. + * Return true when zone was grown, otherwise return false. We return true even + * when we grow less than requested, let the caller decide if there are enough + * pages to satisfy allocation. * * Note: We use noinline because this function is needed only during boot, and * it is called from a __ref function _deferred_grow_zone. This way we are @@ -1604,7 +1601,8 @@ deferred_grow_zone(struct zone *zone, un pg_data_t *pgdat = NODE_DATA(nid); unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); unsigned long nr_pages = 0; - unsigned long first_init_pfn, first_deferred_pfn, spfn, epfn, t, flags; + unsigned long first_init_pfn, spfn, epfn, t, flags; + unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; phys_addr_t spa, epa; u64 i; @@ -1612,21 +1610,32 @@ deferred_grow_zone(struct zone *zone, un if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) return false; - spin_lock_irqsave(&deferred_zone_grow_lock, flags); + pgdat_resize_lock_irq(pgdat, &flags); + /* - * Bail if we raced with another thread that disabled on demand - * initialization. + * If deferred pages have been initialized while we were waiting for + * lock return true, as zone was grown. The caller will try again this + * zone. We won't return to this function again, since caller also has + * this static branch. */ - if (!static_branch_unlikely(&deferred_pages) || !deferred_zone_grow) { - spin_unlock_irqrestore(&deferred_zone_grow_lock, flags); - return false; + if (!static_branch_unlikely(&deferred_pages)) { + pgdat_resize_unlock_irq(pgdat, &flags); + return true; + } + + /* + * If someone grew this zone while we were waiting for spinlock, return + * true, as there might be enough pages already. + */ + if (first_deferred_pfn != pgdat->first_deferred_pfn) { + pgdat_resize_unlock_irq(pgdat, &flags); + return true; } - first_deferred_pfn = pgdat->first_deferred_pfn; first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn); if (first_init_pfn >= pgdat_end_pfn(pgdat)) { - spin_unlock_irqrestore(&deferred_zone_grow_lock, flags); + pgdat_resize_unlock_irq(pgdat, &flags); return false; } @@ -1655,9 +1664,9 @@ deferred_grow_zone(struct zone *zone, un break; } pgdat->first_deferred_pfn = first_deferred_pfn; - spin_unlock_irqrestore(&deferred_zone_grow_lock, flags); + pgdat_resize_unlock_irq(pgdat, &flags); - return nr_pages >= nr_pages_needed; + return nr_pages > 0; } /* @@ -1681,19 +1690,6 @@ void __init page_alloc_init_late(void) #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT int nid; - /* - * We are about to initialize the rest of deferred pages, permanently - * disable on-demand struct page initialization. - * - * Note: it is prohibited to modify static branches in non-preemptible - * context. Since, spin_lock() disables preemption, we must use an - * extra boolean deferred_zone_grow. - */ - spin_lock_irq(&deferred_zone_grow_lock); - deferred_zone_grow = false; - spin_unlock_irq(&deferred_zone_grow_lock); - static_branch_disable(&deferred_pages); - /* There will be num_node_state(N_MEMORY) threads */ atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY)); for_each_node_state(nid, N_MEMORY) { @@ -1703,6 +1699,12 @@ void __init page_alloc_init_late(void) /* Block until all are initialised */ wait_for_completion(&pgdat_init_all_done_comp); + /* + * We initialized the rest of deferred pages, permanently + * disable on-demand struct page initialization. + */ + static_branch_disable(&deferred_pages); + /* Reinit limits that are based on free pages after the kernel is up */ files_maxfiles_init(); #endif _ Patches currently in -mm which might be from pasha.tatashin@xxxxxxxxxx are mm-disable-interrupts-while-initializing-deferred-pages.patch mm-initialize-pages-on-demand-during-boot.patch mm-initialize-pages-on-demand-during-boot-fix-3.patch mm-initialize-pages-on-demand-during-boot-fix-4.patch mm-initialize-pages-on-demand-during-boot-v5.patch mm-memory_hotplug-enforce-block-size-aligned-range-check.patch x86-mm-memory_hotplug-determine-block-size-based-on-the-end-of-boot-memory.patch x86-mm-memory_hotplug-determine-block-size-based-on-the-end-of-boot-memory-v4.patch mm-uninitialized-struct-page-poisoning-sanity-checking.patch mm-uninitialized-struct-page-poisoning-sanity-checking-v4.patch mm-memory_hotplug-optimize-probe-routine.patch mm-memory_hotplug-dont-read-nid-from-struct-page-during-hotplug.patch mm-memory_hotplug-dont-read-nid-from-struct-page-during-hotplug-v5.patch mm-memory_hotplug-optimize-memory-hotplug.patch mm-memory_hotplug-optimize-memory-hotplug-v5.patch xen-mm-allow-deferred-page-initialization-for-xen-pv-domains.patch sparc64-ng4-memset-32-bits-overflow.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html