On 09/13/23 11:54, Usama Arif wrote: > The new boot flow when it comes to initialization of gigantic pages > is as follows: > - At boot time, for a gigantic page during __alloc_bootmem_hugepage, > the region after the first struct page is marked as noinit. > - This results in only the first struct page to be > initialized in reserve_bootmem_region. As the tail struct pages are > not initialized at this point, there can be a significant saving > in boot time if HVO succeeds later on. > - Later on in the boot, the head page is prepped and the first > HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page) - 1 tail struct pages > are initialized. > - HVO is attempted. If it is not successful, then the rest of the > tail struct pages are initialized. If it is successful, no more > tail struct pages need to be initialized saving significant boot time. > > The WARN_ON for increased ref count in gather_bootmem_prealloc was changed > to a VM_BUG_ON. This is OK as there should be no speculative references > this early in boot process. The VM_BUG_ON's are there just in case such code > is introduced. > > Signed-off-by: Usama Arif <usama.arif@xxxxxxxxxxxxx> > --- > mm/hugetlb.c | 63 +++++++++++++++++++++++++++++++++++++------- > mm/hugetlb_vmemmap.c | 2 +- > mm/hugetlb_vmemmap.h | 9 ++++--- > mm/internal.h | 3 +++ > mm/mm_init.c | 2 +- > 5 files changed, 64 insertions(+), 15 deletions(-) Thank you con continued changes. Code looks good, Reviewed-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> -- Mike Kravetz > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index c32ca241df4b..ed37c6e4e952 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -3169,6 +3169,15 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) > } > > found: > + > + /* > + * Only initialize the head struct page in memmap_init_reserved_pages, > + * rest of the struct pages will be initialized by the HugeTLB subsystem itself. > + * The head struct page is used to get folio information by the HugeTLB > + * subsystem like zone id and node id. > + */ > + memblock_reserved_mark_noinit(virt_to_phys((void *)m + PAGE_SIZE), > + huge_page_size(h) - PAGE_SIZE); > /* Put them into a private list first because mem_map is not up yet */ > INIT_LIST_HEAD(&m->list); > list_add(&m->list, &huge_boot_pages); > @@ -3176,6 +3185,42 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) > return 1; > } > > +/* Initialize [start_page:end_page_number] tail struct pages of a hugepage */ > +static void __init hugetlb_folio_init_tail_vmemmap(struct folio *folio, > + unsigned long start_page_number, > + unsigned long end_page_number) > +{ > + enum zone_type zone = zone_idx(folio_zone(folio)); > + int nid = folio_nid(folio); > + unsigned long head_pfn = folio_pfn(folio); > + unsigned long pfn, end_pfn = head_pfn + end_page_number; > + int ret; > + > + for (pfn = head_pfn + start_page_number; pfn < end_pfn; pfn++) { > + struct page *page = pfn_to_page(pfn); > + > + __init_single_page(page, pfn, zone, nid); > + prep_compound_tail((struct page *)folio, pfn - head_pfn); > + ret = page_ref_freeze(page, 1); > + VM_BUG_ON(!ret); > + } > +} > + > +static void __init hugetlb_folio_init_vmemmap(struct folio *folio, struct hstate *h, > + unsigned long nr_pages) > +{ > + int ret; > + > + /* Prepare folio head */ > + __folio_clear_reserved(folio); > + __folio_set_head(folio); > + ret = page_ref_freeze(&folio->page, 1); > + VM_BUG_ON(!ret); > + /* Initialize the necessary tail struct pages */ > + hugetlb_folio_init_tail_vmemmap(folio, 1, nr_pages); > + prep_compound_head((struct page *)folio, huge_page_order(h)); > +} > + > /* > * Put bootmem huge pages into the standard lists after mem_map is up. > * Note: This only applies to gigantic (order > MAX_ORDER) pages. > @@ -3186,19 +3231,19 @@ static void __init gather_bootmem_prealloc(void) > > list_for_each_entry(m, &huge_boot_pages, list) { > struct page *page = virt_to_page(m); > - struct folio *folio = page_folio(page); > + struct folio *folio = (void *)page; > struct hstate *h = m->hstate; > > VM_BUG_ON(!hstate_is_gigantic(h)); > WARN_ON(folio_ref_count(folio) != 1); > - if (prep_compound_gigantic_folio(folio, huge_page_order(h))) { > - WARN_ON(folio_test_reserved(folio)); > - prep_new_hugetlb_folio(h, folio, folio_nid(folio)); > - free_huge_folio(folio); /* add to the hugepage allocator */ > - } else { > - /* VERY unlikely inflated ref count on a tail page */ > - free_gigantic_folio(folio, huge_page_order(h)); > - } > + > + hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES); > + prep_new_hugetlb_folio(h, folio, folio_nid(folio)); > + /* If HVO fails, initialize all tail struct pages */ > + if (!HPageVmemmapOptimized(&folio->page)) > + hugetlb_folio_init_tail_vmemmap(folio, HUGETLB_VMEMMAP_RESERVE_PAGES, > + pages_per_huge_page(h)); > + free_huge_folio(folio); /* add to the hugepage allocator */ > > /* > * We need to restore the 'stolen' pages to totalram_pages > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index 3cdb38d87a95..772a877918d7 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -589,7 +589,7 @@ static int __init hugetlb_vmemmap_init(void) > const struct hstate *h; > > /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */ > - BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE); > + BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES); > > for_each_hstate(h) { > if (hugetlb_vmemmap_optimizable(h)) { > diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h > index 25bd0e002431..4573899855d7 100644 > --- a/mm/hugetlb_vmemmap.h > +++ b/mm/hugetlb_vmemmap.h > @@ -10,15 +10,16 @@ > #define _LINUX_HUGETLB_VMEMMAP_H > #include <linux/hugetlb.h> > > -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP > -int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head); > -void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head); > - > /* > * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See > * Documentation/vm/vmemmap_dedup.rst. > */ > #define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE > +#define HUGETLB_VMEMMAP_RESERVE_PAGES (HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page)) > + > +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP > +int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head); > +void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head); > > static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) > { > diff --git a/mm/internal.h b/mm/internal.h > index d1d4bf4e63c0..d74061aa6de7 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -1154,4 +1154,7 @@ struct vma_prepare { > struct vm_area_struct *remove; > struct vm_area_struct *remove2; > }; > + > +void __meminit __init_single_page(struct page *page, unsigned long pfn, > + unsigned long zone, int nid); > #endif /* __MM_INTERNAL_H */ > diff --git a/mm/mm_init.c b/mm/mm_init.c > index 50f2f34745af..fed4370b02e1 100644 > --- a/mm/mm_init.c > +++ b/mm/mm_init.c > @@ -555,7 +555,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) > node_states[N_MEMORY] = saved_node_state; > } > > -static void __meminit __init_single_page(struct page *page, unsigned long pfn, > +void __meminit __init_single_page(struct page *page, unsigned long pfn, > unsigned long zone, int nid) > { > mm_zero_struct_page(page); > -- > 2.25.1 >