From: Ben Widawsky <ben.widawsky@xxxxxxxxx> Implement the missing huge page allocation functionality while obeying the preferred node semantics. This uses a fallback mechanism to try multiple preferred nodes first, and then all other nodes. It cannot use the helper function that was introduced because huge page allocation already has its own helpers and it was more LOC, and effort to try to consolidate that. The weirdness is MPOL_PREFERRED_MANY can't be called yet because it is part of the UAPI we haven't yet exposed. Instead of make that define global, it's simply changed with the UAPI patch. [ feng: add NOWARN flag, and skip the direct reclaim to speedup allocation in some case ] Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@xxxxxxxxx Signed-off-by: Ben Widawsky <ben.widawsky@xxxxxxxxx> Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> --- mm/hugetlb.c | 26 +++++++++++++++++++++++--- mm/mempolicy.c | 3 ++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8fb42c6..9dfbfa3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1105,7 +1105,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, unsigned long address, int avoid_reserve, long chg) { - struct page *page; + struct page *page = NULL; struct mempolicy *mpol; gfp_t gfp_mask; nodemask_t *nodemask; @@ -1126,7 +1126,17 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, gfp_mask = htlb_alloc_mask(h); nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (mpol->mode != MPOL_BIND && nodemask) { /* AKA MPOL_PREFERRED_MANY */ + gfp_t gfp_mask1 = gfp_mask | __GFP_NOWARN; + + gfp_mask1 &= ~__GFP_DIRECT_RECLAIM; + page = dequeue_huge_page_nodemask(h, + gfp_mask1, nid, nodemask); + if (!page) + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL); + } else { + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + } if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; @@ -1883,7 +1893,17 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, nodemask_t *nodemask; nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); - page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); + if (mpol->mode != MPOL_BIND && nodemask) { /* AKA MPOL_PREFERRED_MANY */ + gfp_t gfp_mask1 = gfp_mask | __GFP_NOWARN; + + gfp_mask1 &= ~__GFP_DIRECT_RECLAIM; + page = alloc_surplus_huge_page(h, + gfp_mask1, nid, nodemask); + if (!page) + alloc_surplus_huge_page(h, gfp_mask, nid, NULL); + } else { + page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); + } mpol_cond_put(mpol); return page; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8fe76a7..40d32cb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2085,7 +2085,8 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, huge_page_shift(hstate_vma(vma))); } else { nid = policy_node(gfp_flags, *mpol, numa_node_id()); - if ((*mpol)->mode == MPOL_BIND) + if ((*mpol)->mode == MPOL_BIND || + (*mpol)->mode == MPOL_PREFERRED_MANY) *nodemask = &(*mpol)->nodes; } return nid; -- 2.7.4