From: Ben Widawsky <ben.widawsky@xxxxxxxxx> Implement the missing huge page allocation functionality while obeying the preferred node semantics. This is similar to the implementation for general page allocation, as it uses a fallback mechanism to try multiple preferred nodes first, and then all other nodes. [Thanks to 0day bot for caching the missing #ifdef CONFIG_NUMA issue] Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@xxxxxxxxx Suggested-by: Michal Hocko <mhocko@xxxxxxxx> Signed-off-by: Ben Widawsky <ben.widawsky@xxxxxxxxx> Co-developed-by: Feng Tang <feng.tang@xxxxxxxxx> Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> --- mm/hugetlb.c | 27 +++++++++++++++++++++++++-- mm/mempolicy.c | 3 ++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e4120680e31a..c771debd35a6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1143,7 +1143,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, unsigned long address, int avoid_reserve, long chg) { - struct page *page; + struct page *page = NULL; struct mempolicy *mpol; gfp_t gfp_mask; nodemask_t *nodemask; @@ -1164,7 +1164,18 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, gfp_mask = htlb_alloc_mask(h); nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); +#ifdef CONFIG_NUMA + if (mpol->mode == MPOL_PREFERRED_MANY) { + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (page) + goto check_reserve; + /* Fallback to all nodes */ + nodemask = NULL; + } +#endif page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + +check_reserve: if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; @@ -2048,9 +2059,21 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, nodemask_t *nodemask; nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); +#ifdef CONFIG_NUMA + if (mpol->mode == MPOL_PREFERRED_MANY) { + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; + + page = alloc_surplus_huge_page(h, gfp, nid, nodemask); + if (page) + goto exit; + /* Fallback to all nodes */ + nodemask = NULL; + } +#endif page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); - mpol_cond_put(mpol); +exit: + mpol_cond_put(mpol); return page; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9dce67fc9bb6..93f8789758a7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2054,7 +2054,8 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, huge_page_shift(hstate_vma(vma))); } else { nid = policy_node(gfp_flags, *mpol, numa_node_id()); - if ((*mpol)->mode == MPOL_BIND) + if ((*mpol)->mode == MPOL_BIND || + (*mpol)->mode == MPOL_PREFERRED_MANY) *nodemask = &(*mpol)->nodes; } return nid; -- 2.7.4