This patch implements MPOL_PREFERRED_MANY for alloc_pages_vma(). Like alloc_pages_current(), alloc_pages_vma() needs to support policy based decisions if they've been configured via mbind(2). The temporary "hack" of treating MPOL_PREFERRED and MPOL_PREFERRED_MANY can now be removed with this, too. All the actual machinery to make this work was part of ("mm/mempolicy: Create a page allocator for policy") Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Signed-off-by: Ben Widawsky <ben.widawsky@xxxxxxxxx> --- mm/mempolicy.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5fb70e6599a6..51ac0d4a2eda 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2281,8 +2281,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, { struct mempolicy *pol; struct page *page; - int preferred_nid; - nodemask_t *nmask; pol = get_vma_policy(vma, addr); @@ -2296,6 +2294,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, } if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { + nodemask_t *nmask; int hpage_node = node; /* @@ -2309,10 +2308,26 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * does not allow the current node in its nodemask, we allocate * the standard way. */ - if ((pol->mode == MPOL_PREFERRED || - pol->mode == MPOL_PREFERRED_MANY) && - !(pol->flags & MPOL_F_LOCAL)) + if (pol->mode == MPOL_PREFERRED || !(pol->flags & MPOL_F_LOCAL)) { hpage_node = first_node(pol->nodes); + } else if (pol->mode == MPOL_PREFERRED_MANY) { + struct zoneref *z; + + /* + * In this policy, with direct reclaim, the normal + * policy based allocation will do the right thing - try + * twice using the preferred nodes first, and all nodes + * second. + */ + if (gfp & __GFP_DIRECT_RECLAIM) { + page = alloc_pages_policy(pol, gfp, order, NUMA_NO_NODE); + goto out; + } + + z = first_zones_zonelist(node_zonelist(numa_node_id(), GFP_HIGHUSER), + gfp_zone(GFP_HIGHUSER), &pol->nodes); + hpage_node = zone_to_nid(z->zone); + } nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { @@ -2338,9 +2353,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, } } - nmask = policy_nodemask(gfp, pol); - preferred_nid = policy_node(gfp, pol, node); - page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); + page = alloc_pages_policy(pol, gfp, order, NUMA_NO_NODE); mpol_cond_put(pol); out: return page; -- 2.27.0