On Mon, Feb 26, 2024 at 11:34:51AM +0800, Baolin Wang wrote: > IMO, I'm not sure whether it's appropriate to decouple > dequeue_hugetlb_folio_nodemask() from alloc_hugetlb_folio_nodemask() into > two separate functions for the users to call, because these details should > be hidden within the hugetlb core implementation. > > Instead, I can move the gfp_mask fiddling into a new helper, and move the > helper into alloc_migrate_hugetlb_folio(). Temporary hugetlb allocation has > its own gfp strategy seems reasonable to me. An alternative would be to do the following, which does not futher carry the "reason" argument into hugetlb code. (Not even compile tested, just a PoC) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c1ee640d87b1..8a89a1007dcb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -970,6 +970,24 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) return modified_mask; } +static inline bool htlb_allow_fallback(int reason) +{ + bool allowed_fallback = false; + + switch (reason) { + case MR_MEMORY_HOTPLUG: + case MR_MEMORY_FAILURE: + case MR_SYSCALL: + case MR_MEMPOLICY_MBIND: + allowed_fallback = true; + break; + default: + break; + } + + return allowed_fallback; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ed1581b670d4..7e8d6b5885d6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2619,7 +2619,7 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h, /* folio migration callback function */ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask, gfp_t gfp_mask) + nodemask_t *nmask, gfp_t gfp_mask, bool allow_fallback) { spin_lock_irq(&hugetlb_lock); if (available_huge_pages(h)) { @@ -2634,6 +2634,12 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, } spin_unlock_irq(&hugetlb_lock); + /* + * We cannot fallback to other nodes, as we could break the per-node pool + */ + if (!allow_fallback) + gfp_mask |= GFP_THISNODE; + return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask); } diff --git a/mm/migrate.c b/mm/migrate.c index cc9f2bcd73b4..8820009acadd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2016,10 +2016,13 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private) if (folio_test_hugetlb(src)) { struct hstate *h = folio_hstate(src); + bool allow_fallback; gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); + allow_fallback = htlb_allow_fallback(mtc->reason); return alloc_hugetlb_folio_nodemask(h, nid, - mtc->nmask, gfp_mask); + mtc->nmask, gfp_mask, + allow_fallback); } if (folio_test_large(src)) { -- Oscar Salvador SUSE Labs