>-----Original Message----- >From: owner-linux-mm@xxxxxxxxx [mailto:owner-linux-mm@xxxxxxxxx] On >Behalf Of Xishi Qiu >Sent: Thursday, April 25, 2019 11:26 AM >To: Wu, Fengguang <fengguang.wu@xxxxxxxxx>; Du, Fan <fan.du@xxxxxxxxx> >Cc: akpm@xxxxxxxxxxxxxxxxxxxx; Michal Hocko <mhocko@xxxxxxxx>; >Williams, Dan J <dan.j.williams@xxxxxxxxx>; Hansen, Dave ><dave.hansen@xxxxxxxxx>; Huang, Ying <ying.huang@xxxxxxxxx>; >linux-mm@xxxxxxxxx; Linux Kernel Mailing List <linux-kernel@xxxxxxxxxxxxxxx> >Subject: Re: [RFC PATCH 5/5] mm, page_alloc: Introduce >ZONELIST_FALLBACK_SAME_TYPE fallback list > >Hi Fan Du, > >I think we should change the print in mminit_verify_zonelist too. > >This patch changes the order of ZONELIST_FALLBACK, so the default numa >policy can >alloc DRAM first, then PMEM, right? Yes, you are right. :) >Thanks, >Xishi Qiu >> >On system with heterogeneous memory, reasonable fall back lists wo >ul be: >> a. No fall back, stick to current running node. >> >b. Fall back to other nodes of the same type or different type >> e.g. DRAM node 0 -> DRAM node 1 -> PMEM node 2 -> >PMEM node 3 >> c. Fall back to other nodes of the same type only. >> e.g. DRAM node 0 -> DRAM node 1 >> >> >a. is already in place, previous patch implement b. providing way to >> >satisfy memory request as best effort by default. And this patch of >> >writing build c. to fallback to the same node type when user specify >> GFP_SAME_NODE_TYPE only. >> >> Signed-off-by: Fan Du <fan.du@xxxxxxxxx> >> --- >> include/linux/gfp.h | 7 +++++++ >> include/linux/mmzone.h | 1 + >> mm/page_alloc.c | 15 +++++++++++++++ >> 3 files changed, 23 insertions(+) >> >> diff --git a/include/linux/gfp.h b/include/linux/gfp.h >> index fdab7de..ca5fdfc 100644 >> --- a/include/linux/gfp.h >> +++ b/include/linux/gfp.h >> @@ -44,6 +44,8 @@ >> #else >> #define ___GFP_NOLOCKDEP 0 >> #endif >> +#define ___GFP_SAME_NODE_TYPE 0x1000000u >> + >> /* If the above are modified, __GFP_BITS_SHIFT may need up >dating */ >> >> /* >> @@ -215,6 +217,7 @@ >> >> /* Disable lockdep for GFP context tracking */ >> #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) >> >+#define __GFP_SAME_NODE_TYPE ((__force gfp_t)___GFP_SAME_NODE_T >YPE) >> >> /* Room for N __GFP_FOO bits */ >> #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) >> @@ -301,6 +304,8 @@ >> __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLA >IM) >> #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRE >CT_RECLAIM) >> >> +#define GFP_SAME_NODE_TYPE (__GFP_SAME_NODE_TYPE) >> + >> /* Convert GFP flags to their corresponding migrate type */ >> #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVA >BLE) >> #define GFP_MOVABLE_SHIFT 3 >> @@ -438,6 +443,8 @@ static inline int gfp_zonelist(gfp_t flags) >> #ifdef CONFIG_NUMA >> if (unlikely(flags & __GFP_THISNODE)) >> return ZONELIST_NOFALLBACK; >> + if (unlikely(flags & __GFP_SAME_NODE_TYPE)) >> + return ZONELIST_FALLBACK_SAME_TYPE; >> #endif >> return ZONELIST_FALLBACK; >> } >> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h >> index 8c37e1c..2f8603e 100644 >> --- a/include/linux/mmzone.h >> +++ b/include/linux/mmzone.h >> >@@ -583,6 +583,7 @@ static inline bool zone_intersects(struct zone >*zone, >> >> enum { >> ZONELIST_FALLBACK, /* zonelist with fallback */ >> >+ ZONELIST_FALLBACK_SAME_TYPE, /* zonelist with fallback to the sam >e type node */ >> #ifdef CONFIG_NUMA >> /* >> * The NUMA zonelists are doubled because we need zonel >ists that >> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >> index a408a91..de797921 100644 >> --- a/mm/page_alloc.c >> +++ b/mm/page_alloc.c >> >@@ -5448,6 +5448,21 @@ static void build_zonelists_in_node_order(pg >_data_t *pgdat, int *node_order, >> } >> zonerefs->zone = NULL; >> zonerefs->zone_idx = 0; >> + >> >+ zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK_SAME_TYPE]._zon >erefs; >> + >> + for (i = 0; i < nr_nodes; i++) { >> + int nr_zones; >> + >> + pg_data_t *node = NODE_DATA(node_order[i]); >> + >> + if (!is_node_same_type(node->node_id, pgdat->node_id)) >> + continue; >> + nr_zones = build_zonerefs_node(node, zonerefs); >> + zonerefs += nr_zones; >> + } >> + zonerefs->zone = NULL; >> + zonerefs->zone_idx = 0; >> } >> >> /* >> -- >> 1.8.3.1 >> >>