On 3/21/24 15:24, Matthew Wilcox (Oracle) wrote: > The current folio_test_hugetlb() can be fooled by a concurrent folio split > into returning true for a folio which has never belonged to hugetlbfs. > This can't happen if the caller holds a refcount on it, but we have a > few places (memory-failure, compaction, procfs) which do not and should > not take a speculative reference. In compaction and with CONFIG_DEBUG_VM enabled, the current implementation can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks in the PageHuge() testing path. > Since hugetlb pages do not use individual page mapcounts (they are always > fully mapped and use the entire_mapcount field to record the number > of mappings), the PageType field is available now that page_mapcount() > ignores the value in this field. Reported-by: Luis Chamberlain <mcgrof@xxxxxxxxxx> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Cc: <stable@xxxxxxxxxxxxxxx> > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > Reviewed-by: David Hildenbrand <david@xxxxxxxxxx> > --- > include/linux/page-flags.h | 70 ++++++++++++++++------------------ > include/trace/events/mmflags.h | 1 + > mm/hugetlb.c | 22 ++--------- > 3 files changed, 37 insertions(+), 56 deletions(-) > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 5852f967c640..6fb3cd42ee59 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -190,7 +190,6 @@ enum pageflags { > > /* At least one page in this folio has the hwpoison flag set */ > PG_has_hwpoisoned = PG_error, > - PG_hugetlb = PG_active, > PG_large_rmappable = PG_workingset, /* anon or file-backed */ > }; > > @@ -876,29 +875,6 @@ FOLIO_FLAG_FALSE(large_rmappable) > > #define PG_head_mask ((1UL << PG_head)) > > -#ifdef CONFIG_HUGETLB_PAGE > -int PageHuge(const struct page *page); > -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) > -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) > - > -/** > - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs > - * @folio: The folio to test. > - * > - * Context: Any context. Caller should have a reference on the folio to > - * prevent it from being turned into a tail page. > - * Return: True for hugetlbfs folios, false for anon folios or folios > - * belonging to other filesystems. > - */ > -static inline bool folio_test_hugetlb(const struct folio *folio) > -{ > - return folio_test_large(folio) && > - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); > -} > -#else > -TESTPAGEFLAG_FALSE(Huge, hugetlb) > -#endif > - > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > /* > * PageHuge() only returns true for hugetlbfs pages, but not for > @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) > TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) > #endif > > -/* > - * Check if a page is currently marked HWPoisoned. Note that this check is > - * best effort only and inherently racy: there is no way to synchronize with > - * failing hardware. > - */ > -static inline bool is_page_hwpoison(struct page *page) > -{ > - if (PageHWPoison(page)) > - return true; > - return PageHuge(page) && PageHWPoison(compound_head(page)); > -} > - > /* > * For pages that are never mapped to userspace (and aren't PageSlab), > * page_type may be used. Because it is initialised to -1, we invert the > @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) > #define PG_offline 0x00000100 > #define PG_table 0x00000200 > #define PG_guard 0x00000400 > +#define PG_hugetlb 0x00000800 > > #define PageType(page, flag) \ > ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) > @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) > */ > PAGE_TYPE_OPS(Guard, guard, guard) > > +#ifdef CONFIG_HUGETLB_PAGE > +FOLIO_TYPE_OPS(hugetlb, hugetlb) > +#else > +FOLIO_TEST_FLAG_FALSE(hugetlb) > +#endif > + > +/** > + * PageHuge - Determine if the page belongs to hugetlbfs > + * @page: The page to test. > + * > + * Context: Any context. > + * Return: True for hugetlbfs pages, false for anon pages or pages > + * belonging to other filesystems. > + */ > +static inline bool PageHuge(const struct page *page) > +{ > + return folio_test_hugetlb(page_folio(page)); > +} > + > +/* > + * Check if a page is currently marked HWPoisoned. Note that this check is > + * best effort only and inherently racy: there is no way to synchronize with > + * failing hardware. > + */ > +static inline bool is_page_hwpoison(struct page *page) > +{ > + if (PageHWPoison(page)) > + return true; > + return PageHuge(page) && PageHWPoison(compound_head(page)); > +} > + > extern bool is_free_buddy_page(struct page *page); > > PAGEFLAG(Isolated, isolated, PF_ANY); > @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) > */ > #define PAGE_FLAGS_SECOND \ > (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ > - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) > + 1UL << PG_large_rmappable) > > #define PAGE_FLAGS_PRIVATE \ > (1UL << PG_private | 1UL << PG_private_2) > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index d801409b33cf..d55e53ac91bd 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) > #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } > > #define __def_pagetype_names \ > + DEF_PAGETYPE_NAME(hugetlb), \ > DEF_PAGETYPE_NAME(offline), \ > DEF_PAGETYPE_NAME(guard), \ > DEF_PAGETYPE_NAME(table), \ > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 7e9a766059aa..bdcbb62096cf 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, > { > lockdep_assert_held(&hugetlb_lock); > > - folio_clear_hugetlb(folio); > + __folio_clear_hugetlb(folio); > } > > /* > @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, > h->surplus_huge_pages_node[nid]++; > } > > - folio_set_hugetlb(folio); > + __folio_set_hugetlb(folio); > folio_change_private(folio, NULL); > /* > * We have to set hugetlb_vmemmap_optimized again as above > @@ -2050,7 +2050,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) > > static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) > { > - folio_set_hugetlb(folio); > + __folio_set_hugetlb(folio); > INIT_LIST_HEAD(&folio->lru); > hugetlb_set_folio_subpool(folio, NULL); > set_hugetlb_cgroup(folio, NULL); > @@ -2160,22 +2160,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, > return __prep_compound_gigantic_folio(folio, order, true); > } > > -/* > - * PageHuge() only returns true for hugetlbfs pages, but not for normal or > - * transparent huge pages. See the PageTransHuge() documentation for more > - * details. > - */ > -int PageHuge(const struct page *page) > -{ > - const struct folio *folio; > - > - if (!PageCompound(page)) > - return 0; > - folio = page_folio(page); > - return folio_test_hugetlb(folio); > -} > -EXPORT_SYMBOL_GPL(PageHuge); > - > /* > * Find and lock address space (mapping) in write mode. > *