Hi Fan Du, I think we should change the print in mminit_verify_zonelist too. This patch changes the order of ZONELIST_FALLBACK, so the default numa policy can alloc DRAM first, then PMEM, right? Thanks, Xishi Qiu > On system with heterogeneous memory, reasonable fall back lists woul be: > a. No fall back, stick to current running node. > b. Fall back to other nodes of the same type or different type > e.g. DRAM node 0 -> DRAM node 1 -> PMEM node 2 -> PMEM node 3 > c. Fall back to other nodes of the same type only. > e.g. DRAM node 0 -> DRAM node 1 > > a. is already in place, previous patch implement b. providing way to > satisfy memory request as best effort by default. And this patch of > writing build c. to fallback to the same node type when user specify > GFP_SAME_NODE_TYPE only. > > Signed-off-by: Fan Du <fan.du@xxxxxxxxx> > --- > include/linux/gfp.h | 7 +++++++ > include/linux/mmzone.h | 1 + > mm/page_alloc.c | 15 +++++++++++++++ > 3 files changed, 23 insertions(+) > > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index fdab7de..ca5fdfc 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -44,6 +44,8 @@ > #else > #define ___GFP_NOLOCKDEP 0 > #endif > +#define ___GFP_SAME_NODE_TYPE 0x1000000u > + > /* If the above are modified, __GFP_BITS_SHIFT may need updating */ > > /* > @@ -215,6 +217,7 @@ > > /* Disable lockdep for GFP context tracking */ > #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) > +#define __GFP_SAME_NODE_TYPE ((__force gfp_t)___GFP_SAME_NODE_TYPE) > > /* Room for N __GFP_FOO bits */ > #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) > @@ -301,6 +304,8 @@ > __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) > #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) > > +#define GFP_SAME_NODE_TYPE (__GFP_SAME_NODE_TYPE) > + > /* Convert GFP flags to their corresponding migrate type */ > #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) > #define GFP_MOVABLE_SHIFT 3 > @@ -438,6 +443,8 @@ static inline int gfp_zonelist(gfp_t flags) > #ifdef CONFIG_NUMA > if (unlikely(flags & __GFP_THISNODE)) > return ZONELIST_NOFALLBACK; > + if (unlikely(flags & __GFP_SAME_NODE_TYPE)) > + return ZONELIST_FALLBACK_SAME_TYPE; > #endif > return ZONELIST_FALLBACK; > } > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 8c37e1c..2f8603e 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -583,6 +583,7 @@ static inline bool zone_intersects(struct zone *zone, > > enum { > ZONELIST_FALLBACK, /* zonelist with fallback */ > + ZONELIST_FALLBACK_SAME_TYPE, /* zonelist with fallback to the same type node */ > #ifdef CONFIG_NUMA > /* > * The NUMA zonelists are doubled because we need zonelists that > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index a408a91..de797921 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -5448,6 +5448,21 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int *node_order, > } > zonerefs->zone = NULL; > zonerefs->zone_idx = 0; > + > + zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK_SAME_TYPE]._zonerefs; > + > + for (i = 0; i < nr_nodes; i++) { > + int nr_zones; > + > + pg_data_t *node = NODE_DATA(node_order[i]); > + > + if (!is_node_same_type(node->node_id, pgdat->node_id)) > + continue; > + nr_zones = build_zonerefs_node(node, zonerefs); > + zonerefs += nr_zones; > + } > + zonerefs->zone = NULL; > + zonerefs->zone_idx = 0; > } > > /* > -- > 1.8.3.1 > >