The patch titled Subject: mm/hugetlb: enable bootmem allocation from CMA areas has been added to the -mm mm-unstable branch. Its filename is mm-hugetlb-enable-bootmem-allocation-from-cma-areas.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-hugetlb-enable-bootmem-allocation-from-cma-areas.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Frank van der Linden <fvdl@xxxxxxxxxx> Subject: mm/hugetlb: enable bootmem allocation from CMA areas Date: Mon, 27 Jan 2025 23:22:07 +0000 If hugetlb_cma_only is enabled, we know that hugetlb pages can only be allocated from CMA. Now that there is an interface to do early reservations from a CMA area (returning memblock memory), it can be used to allocate hugetlb pages from CMA. This also allows for doing pre-HVO on these pages (if enabled). Make sure to initialize the page structures and associated data correctly. Create a flag to signal that a hugetlb page has been allocated from CMA to make things a little easier. Some configurations of powerpc have a special hugetlb bootmem allocator, so introduce a boolean arch_specific_huge_bootmem_alloc that returns true if such an allocator is present. In that case, CMA bootmem allocations can't be used, so check that function before trying. Link: https://lkml.kernel.org/r/20250127232207.3888640-28-fvdl@xxxxxxxxxx Signed-off-by: Frank van der Linden <fvdl@xxxxxxxxxx> Cc: Madhavan Srinivasan <maddy@xxxxxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Alexander Gordeev <agordeev@xxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Heiko Carstens <hca@xxxxxxxxxxxxx> Cc: Joao Martins <joao.m.martins@xxxxxxxxxx> Cc: Mike Rapoport <rppt@xxxxxxxxxx> Cc: Muchun Song <muchun.song@xxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Roman Gushchin (Cruise) <roman.gushchin@xxxxxxxxx> Cc: Usama Arif <usama.arif@xxxxxxxxxxxxx> Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx> Cc: Yu Zhao <yuzhao@xxxxxxxxxx> Cc: Zhenguo Yao <yaozhenguo1@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- arch/powerpc/mm/hugetlbpage.c | 5 + include/linux/hugetlb.h | 7 + mm/hugetlb.c | 135 ++++++++++++++++++++++++-------- 3 files changed, 114 insertions(+), 33 deletions(-) --- a/arch/powerpc/mm/hugetlbpage.c~mm-hugetlb-enable-bootmem-allocation-from-cma-areas +++ a/arch/powerpc/mm/hugetlbpage.c @@ -121,6 +121,11 @@ bool __init hugetlb_node_alloc_supported { return false; } + +bool __init arch_specific_huge_bootmem_alloc(struct hstate *h) +{ + return (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()); +} #endif --- a/include/linux/hugetlb.h~mm-hugetlb-enable-bootmem-allocation-from-cma-areas +++ a/include/linux/hugetlb.h @@ -591,6 +591,7 @@ enum hugetlb_page_flags { HPG_freed, HPG_vmemmap_optimized, HPG_raw_hwp_unreliable, + HPG_cma, __NR_HPAGEFLAGS, }; @@ -650,6 +651,7 @@ HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) +HPAGEFLAG(Cma, cma) #ifdef CONFIG_HUGETLB_PAGE @@ -678,14 +680,18 @@ struct hstate { char name[HSTATE_NAME_LEN]; }; +struct cma; + struct huge_bootmem_page { struct list_head list; struct hstate *hstate; unsigned long flags; + struct cma *cma; }; #define HUGE_BOOTMEM_HVO 0x0001 #define HUGE_BOOTMEM_ZONES_VALID 0x0002 +#define HUGE_BOOTMEM_CMA 0x0004 bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); @@ -711,6 +717,7 @@ bool __init hugetlb_node_alloc_supported void __init hugetlb_add_hstate(unsigned order); bool __init arch_hugetlb_valid_size(unsigned long size); +bool __init arch_specific_huge_bootmem_alloc(struct hstate *h); struct hstate *size_to_hstate(unsigned long size); #ifndef HUGE_MAX_HSTATE --- a/mm/hugetlb.c~mm-hugetlb-enable-bootmem-allocation-from-cma-areas +++ a/mm/hugetlb.c @@ -61,7 +61,7 @@ static struct cma *hugetlb_cma[MAX_NUMNO static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; #endif static bool hugetlb_cma_only; -static unsigned long hugetlb_cma_size __initdata; +static unsigned long hugetlb_cma_size; __initdata struct list_head huge_boot_pages[MAX_NUMNODES]; __initdata unsigned long hstate_boot_nrinvalid[HUGE_MAX_HSTATE]; @@ -132,8 +132,10 @@ static void hugetlb_free_folio(struct fo #ifdef CONFIG_CMA int nid = folio_nid(folio); - if (cma_free_folio(hugetlb_cma[nid], folio)) + if (folio_test_hugetlb_cma(folio)) { + WARN_ON(!cma_free_folio(hugetlb_cma[nid], folio)); return; + } #endif folio_put(folio); } @@ -1509,6 +1511,9 @@ retry: break; } } + + if (folio) + folio_set_hugetlb_cma(folio); } #endif if (!folio) { @@ -3175,6 +3180,63 @@ out_end_reservation: return ERR_PTR(-ENOSPC); } +/* + * Some architectures do their own bootmem allocation, so they can't use + * early CMA allocation. So, allow for this function to be redefined. + */ +bool __init __attribute((weak)) +arch_specific_huge_bootmem_alloc(struct hstate *h) +{ + return false; +} + +static bool __init hugetlb_early_cma(struct hstate *h) +{ + if (arch_specific_huge_bootmem_alloc(h)) + return false; + + return (hstate_is_gigantic(h) && hugetlb_cma_size && hugetlb_cma_only); +} + +static __init void *alloc_bootmem(struct hstate *h, int nid) +{ + struct huge_bootmem_page *m; + unsigned long flags; + struct cma *cma; + +#ifdef CONFIG_CMA + if (hugetlb_early_cma(h)) { + flags = HUGE_BOOTMEM_CMA; + cma = hugetlb_cma[nid]; + m = cma_reserve_early(cma, huge_page_size(h)); + } else +#endif + { + flags = 0; + cma = NULL; + m = memblock_alloc_try_nid_raw(huge_page_size(h), + huge_page_size(h), 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); + } + + if (m) { + /* + * Use the beginning of the huge page to store the + * huge_bootmem_page struct (until gather_bootmem + * puts them into the mem_map). + * + * Put them into a private list first because mem_map + * is not up yet. + */ + INIT_LIST_HEAD(&m->list); + list_add(&m->list, &huge_boot_pages[nid]); + m->hstate = h; + m->flags = flags; + m->cma = cma; + } + + return m; +} + int alloc_bootmem_huge_page(struct hstate *h, int nid) __attribute__ ((weak, alias("__alloc_bootmem_huge_page"))); int __alloc_bootmem_huge_page(struct hstate *h, int nid) @@ -3184,17 +3246,14 @@ int __alloc_bootmem_huge_page(struct hst /* do node specific alloc */ if (nid != NUMA_NO_NODE) { - m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h), - 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); + m = alloc_bootmem(h, node); if (!m) return 0; goto found; } /* allocate from next node when distributing huge pages */ for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_ONLINE]) { - m = memblock_alloc_try_nid_raw( - huge_page_size(h), huge_page_size(h), - 0, MEMBLOCK_ALLOC_ACCESSIBLE, node); + m = alloc_bootmem(h, node); if (m) break; } @@ -3203,7 +3262,6 @@ int __alloc_bootmem_huge_page(struct hst return 0; found: - /* * Only initialize the head struct page in memmap_init_reserved_pages, * rest of the struct pages will be initialized by the HugeTLB @@ -3213,18 +3271,6 @@ found: */ memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE), huge_page_size(h) - PAGE_SIZE); - /* - * Use the beginning of the huge page to store the - * huge_bootmem_page struct (until gather_bootmem - * puts them into the mem_map). - * - * Put them into a private list first because mem_map - * is not up yet. - */ - INIT_LIST_HEAD(&m->list); - list_add(&m->list, &huge_boot_pages[node]); - m->hstate = h; - m->flags = 0; return 1; } @@ -3265,13 +3311,25 @@ static void __init hugetlb_folio_init_vm prep_compound_head((struct page *)folio, huge_page_order(h)); } +static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) +{ + return (m->flags & HUGE_BOOTMEM_HVO); +} + +static bool __init hugetlb_bootmem_page_earlycma(struct huge_bootmem_page *m) +{ + return (m->flags & HUGE_BOOTMEM_CMA); +} + /* * memblock-allocated pageblocks might not have the migrate type set * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE) - * here. + * here, or MIGRATE_CMA if this was a page allocated through an early CMA + * reservation. * - * Note that this will not write the page struct, it is ok (and necessary) - * to do this on vmemmap optimized folios. + * In case of vmemmap optimized folios, the tail vmemmap pages are mapped + * read-only, but that's ok - for sparse vmemmap this does not write to + * the page structure. */ static void __init hugetlb_bootmem_init_migratetype(struct folio *folio, struct hstate *h) @@ -3280,9 +3338,13 @@ static void __init hugetlb_bootmem_init_ WARN_ON_ONCE(!pageblock_aligned(folio_pfn(folio))); - for (i = 0; i < nr_pages; i += pageblock_nr_pages) - set_pageblock_migratetype(folio_page(folio, i), + for (i = 0; i < nr_pages; i += pageblock_nr_pages) { + if (folio_test_hugetlb_cma(folio)) + init_cma_pageblock(folio_page(folio, i)); + else + set_pageblock_migratetype(folio_page(folio, i), MIGRATE_MOVABLE); + } } static void __init prep_and_add_bootmem_folios(struct hstate *h, @@ -3319,7 +3381,7 @@ bool __init hugetlb_bootmem_page_zones_v struct huge_bootmem_page *m) { unsigned long start_pfn; - bool valid; + bool valid = false; if (m->flags & HUGE_BOOTMEM_ZONES_VALID) { /* @@ -3328,10 +3390,16 @@ bool __init hugetlb_bootmem_page_zones_v return true; } + if (hugetlb_bootmem_page_earlycma(m)) { + valid = cma_validate_zones(m->cma); + goto out; + } + start_pfn = virt_to_phys(m) >> PAGE_SHIFT; valid = !pfn_range_intersects_zones(nid, start_pfn, pages_per_huge_page(m->hstate)); +out: if (!valid) hstate_boot_nrinvalid[hstate_index(m->hstate)]++; @@ -3360,11 +3428,6 @@ static void __init hugetlb_bootmem_free_ } } -static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) -{ - return (m->flags & HUGE_BOOTMEM_HVO); -} - /* * Put bootmem huge pages into the standard lists after mem_map is up. * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages. @@ -3414,6 +3477,9 @@ static void __init gather_bootmem_preall */ folio_set_hugetlb_vmemmap_optimized(folio); + if (hugetlb_bootmem_page_earlycma(m)) + folio_set_hugetlb_cma(folio); + list_add(&folio->lru, &folio_list); /* @@ -3606,8 +3672,11 @@ static void __init hugetlb_hstate_alloc_ { unsigned long allocated; - /* skip gigantic hugepages allocation if hugetlb_cma enabled */ - if (hstate_is_gigantic(h) && hugetlb_cma_size) { + /* + * Skip gigantic hugepages allocation if early CMA + * reservations are not available. + */ + if (hstate_is_gigantic(h) && hugetlb_cma_size && !hugetlb_early_cma(h)) { pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); return; } _ Patches currently in -mm which might be from fvdl@xxxxxxxxxx are mm-cma-export-total-and-free-number-of-pages-for-cma-areas.patch mm-cma-support-multiple-contiguous-ranges-if-requested.patch mm-cma-introduce-cma_intersects-function.patch mm-hugetlb-use-cma_declare_contiguous_multi.patch mm-hugetlb-fix-round-robin-bootmem-allocation.patch mm-hugetlb-remove-redundant-__clearpagereserved.patch mm-hugetlb-use-online-nodes-for-bootmem-allocation.patch mm-hugetlb-convert-cmdline-parameters-from-setup-to-early.patch x86-mm-make-register_page_bootmem_memmap-handle-pte-mappings.patch mm-bootmem_info-export-register_page_bootmem_memmap.patch mm-sparse-allow-for-alternate-vmemmap-section-init-at-boot.patch mm-hugetlb-set-migratetype-for-bootmem-folios.patch mm-define-__init_reserved_page_zone-function.patch mm-hugetlb-check-bootmem-pages-for-zone-intersections.patch mm-sparse-add-vmemmap__hvo-functions.patch mm-hugetlb-deal-with-multiple-calls-to-hugetlb_bootmem_alloc.patch mm-hugetlb-move-huge_boot_pages-list-init-to-hugetlb_bootmem_alloc.patch mm-hugetlb-add-pre-hvo-framework.patch mm-hugetlb_vmemmap-fix-hugetlb_vmemmap_restore_folios-definition.patch mm-hugetlb-do-pre-hvo-for-bootmem-allocated-pages.patch x86-setup-call-hugetlb_bootmem_alloc-early.patch x86-mm-set-arch_want_sparsemem_vmemmap_preinit.patch mm-cma-simplify-zone-intersection-check.patch mm-cma-introduce-a-cma-validate-function.patch mm-cma-introduce-interface-for-early-reservations.patch mm-hugetlb-add-hugetlb_cma_only-cmdline-option.patch mm-hugetlb-enable-bootmem-allocation-from-cma-areas.patch