From: Ben Widawsky <ben.widawsky@xxxxxxxxx> Subject: mm/hugetlb: add support for mempolicy MPOL_PREFERRED_MANY Implement the missing huge page allocation functionality while obeying the preferred node semantics. This is similar to the implementation for general page allocation, as it uses a fallback mechanism to try multiple preferred nodes first, and then all other nodes. To avoid adding too many "#ifdef CONFIG_NUMA" check, add a helper function in mempolicy.h to check whether a mempolicy is MPOL_PREFERRED_MANY. [akpm@xxxxxxxxxxxxxxxxxxxx: fix compiling issue when merging with other hugetlb patch] [Thanks to 0day bot for catching the !CONFIG_NUMA compiling issue] [mhocko@xxxxxxxx: suggest to remove the #ifdef CONFIG_NUMA check] [ben.widawsky@xxxxxxxxx: add helpers to avoid ifdefs] Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@xxxxxxxxx Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@xxxxxxxxx Link: https://lkml.kernel.org/r/20210809024430.GA46432@xxxxxxxxxxxxxxxxxxxxxxx [nathan@xxxxxxxxxx: initialize page to NULL in alloc_buddy_huge_page_with_mpol()] Link: https://lkml.kernel.org/r/20210810200632.3812797-1-nathan@xxxxxxxxxx Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@xxxxxxxxx Link: https://lkml.kernel.org/r/1627970362-61305-4-git-send-email-feng.tang@xxxxxxxxx Link: https://lkml.kernel.org/r/20210809024430.GA46432@xxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Ben Widawsky <ben.widawsky@xxxxxxxxx> Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> Signed-off-by: Nathan Chancellor <nathan@xxxxxxxxxx> Co-developed-by: Feng Tang <feng.tang@xxxxxxxxx> Suggested-by: Michal Hocko <mhocko@xxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/mempolicy.h | 12 ++++++++++++ mm/hugetlb.c | 30 +++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) --- a/include/linux/mempolicy.h~mm-hugetlb-add-support-for-mempolicy-mpol_preferred_many +++ a/include/linux/mempolicy.h @@ -186,6 +186,12 @@ extern void mpol_put_task_policy(struct extern bool numa_demotion_enabled; +static inline bool mpol_is_preferred_many(struct mempolicy *pol) +{ + return (pol->mode == MPOL_PREFERRED_MANY); +} + + #else struct mempolicy {}; @@ -296,5 +302,11 @@ static inline nodemask_t *policy_nodemas } #define numa_demotion_enabled false + +static inline bool mpol_is_preferred_many(struct mempolicy *pol) +{ + return false; +} + #endif /* CONFIG_NUMA */ #endif --- a/mm/hugetlb.c~mm-hugetlb-add-support-for-mempolicy-mpol_preferred_many +++ a/mm/hugetlb.c @@ -1145,7 +1145,7 @@ static struct page *dequeue_huge_page_vm unsigned long address, int avoid_reserve, long chg) { - struct page *page; + struct page *page = NULL; struct mempolicy *mpol; gfp_t gfp_mask; nodemask_t *nodemask; @@ -1166,7 +1166,17 @@ static struct page *dequeue_huge_page_vm gfp_mask = htlb_alloc_mask(h); nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + + if (mpol_is_preferred_many(mpol)) { + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + + /* Fallback to all nodes if page==NULL */ + nodemask = NULL; + } + + if (!page) + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; @@ -2142,16 +2152,26 @@ static struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { - struct page *page; + struct page *page = NULL; struct mempolicy *mpol; gfp_t gfp_mask = htlb_alloc_mask(h); int nid; nodemask_t *nodemask; nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); - page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false); - mpol_cond_put(mpol); + if (mpol_is_preferred_many(mpol)) { + gfp_t gfp = gfp_mask | __GFP_NOWARN; + + gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); + page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false); + /* Fallback to all nodes if page==NULL */ + nodemask = NULL; + } + + if (!page) + page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask, false); + mpol_cond_put(mpol); return page; } _