On Tue, Sep 3, 2024 at 2:36 PM Yu Zhao <yuzhao@xxxxxxxxxx> wrote: > > Only tag the new head pages when splitting one large folio to multiple > ones of a lower order. Tagging tail pages can cause imbalanced "calls" > counters, since only head pages are untagged by pgalloc_tag_sub() and > reference counts on tail pages are leaked, e.g., > # echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size > # echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages > # time echo 700 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote > # grep alloc_gigantic_folio /proc/allocinfo > > Before this patch: > 0 549427200 mm/hugetlb.c:1549 func:alloc_gigantic_folio > > real 0m2.057s > user 0m0.000s > sys 0m2.051s > > After this patch: > 0 0 mm/hugetlb.c:1549 func:alloc_gigantic_folio > > real 0m1.711s > user 0m0.000s > sys 0m1.704s > > Not tagging tail pages also improves the splitting time, e.g., by > about 15% when demoting 1GB hugeTLB folios to 2MB ones, as shown > above. > > Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting") > Signed-off-by: Yu Zhao <yuzhao@xxxxxxxxxx> > --- > include/linux/mm.h | 30 ++++++++++++++++++++++++++++++ > include/linux/pgalloc_tag.h | 31 ------------------------------- > mm/huge_memory.c | 2 +- > mm/hugetlb.c | 2 +- > mm/page_alloc.c | 4 ++-- > 5 files changed, 34 insertions(+), 35 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index b31d4bdd65ad..a07e93adb8ad 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -4137,4 +4137,34 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma); > > int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); > > +#ifdef CONFIG_MEM_ALLOC_PROFILING > +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) > +{ > + int i; > + struct alloc_tag *tag; > + unsigned int nr_pages = 1 << new_order; > + > + if (!mem_alloc_profiling_enabled()) > + return; > + > + tag = pgalloc_tag_get(&folio->page); > + if (!tag) > + return; > + > + for (i = nr_pages; i < (1 << old_order); i += nr_pages) { > + union codetag_ref *ref = get_page_tag_ref(folio_page(folio, i)); > + > + if (ref) { > + /* Set new reference to point to the original tag */ > + alloc_tag_ref_set(ref, tag); > + put_page_tag_ref(ref); > + } > + } > +} > +#else /* !CONFIG_MEM_ALLOC_PROFILING */ > +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) > +{ > +} > +#endif /* CONFIG_MEM_ALLOC_PROFILING */ > + > #endif /* _LINUX_MM_H */ > diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h > index 207f0c83c8e9..59a3deb792a8 100644 > --- a/include/linux/pgalloc_tag.h > +++ b/include/linux/pgalloc_tag.h > @@ -80,36 +80,6 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) > } > } > > -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) > -{ > - int i; > - struct page_ext *first_page_ext; > - struct page_ext *page_ext; > - union codetag_ref *ref; > - struct alloc_tag *tag; > - > - if (!mem_alloc_profiling_enabled()) > - return; > - > - first_page_ext = page_ext = page_ext_get(page); > - if (unlikely(!page_ext)) > - return; > - > - ref = codetag_ref_from_page_ext(page_ext); > - if (!ref->ct) > - goto out; > - > - tag = ct_to_alloc_tag(ref->ct); > - page_ext = page_ext_next(page_ext); > - for (i = 1; i < nr; i++) { > - /* Set new reference to point to the original tag */ > - alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag); > - page_ext = page_ext_next(page_ext); > - } > -out: > - page_ext_put(first_page_ext); > -} > - > static inline struct alloc_tag *pgalloc_tag_get(struct page *page) > { > struct alloc_tag *tag = NULL; > @@ -142,7 +112,6 @@ static inline void clear_page_tag_ref(struct page *page) {} > static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, > unsigned int nr) {} > static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} > -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {} > static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } > static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 0993dfe9ae94..aa8a4c938ba9 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -3244,7 +3244,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, > /* Caller disabled irqs, so they are still disabled here */ > > split_page_owner(head, order, new_order); > - pgalloc_tag_split(head, 1 << order); > + pgalloc_tag_split(folio, order, new_order); Looks like here and in the next diff you are fixing an additional bug. I was assuming that we are always splitting into order-0 pages, which is not the case anymore. It's worth mentioning that in the changelog. With that added: Acked-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> > > /* See comment in __split_huge_page_tail() */ > if (folio_test_anon(folio)) { > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 3faf5aad142d..a8624c07d8bf 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -3778,7 +3778,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, > list_del(&folio->lru); > > split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst)); > - pgalloc_tag_split(&folio->page, 1 << huge_page_order(src)); > + pgalloc_tag_split(folio, huge_page_order(src), huge_page_order(dst)); > > for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) { > struct page *page = folio_page(folio, i); > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index c242d61fc4fd..13ce8e8899ed 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -2822,7 +2822,7 @@ void split_page(struct page *page, unsigned int order) > for (i = 1; i < (1 << order); i++) > set_page_refcounted(page + i); > split_page_owner(page, order, 0); > - pgalloc_tag_split(page, 1 << order); > + pgalloc_tag_split(page_folio(page), order, 0); > split_page_memcg(page, order, 0); > } > EXPORT_SYMBOL_GPL(split_page); > @@ -5020,7 +5020,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, > struct page *last = page + nr; > > split_page_owner(page, order, 0); > - pgalloc_tag_split(page, 1 << order); > + pgalloc_tag_split(page_folio(page), order, 0); > split_page_memcg(page, order, 0); > while (page < --last) > set_page_refcounted(last); > -- > 2.46.0.469.g59c65b2a67-goog >