On 7/29/22 12:09 PM, Huang, Ying wrote: > "Aneesh Kumar K.V" <aneesh.kumar@xxxxxxxxxxxxx> writes: > >> With memory tiers support we can have memory only NUMA nodes >> in the top tier from which we want to avoid promotion tracking NUMA >> faults. Update node_is_toptier to work with memory tiers. >> All NUMA nodes are by default top tier nodes. With lower memory >> tiers added we consider all memory tiers above a memory tier having >> CPU NUMA nodes as a top memory tier >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> >> --- >> include/linux/memory-tiers.h | 11 ++++++++++ >> include/linux/node.h | 5 ----- >> mm/huge_memory.c | 1 + >> mm/memory-tiers.c | 42 ++++++++++++++++++++++++++++++++++++ >> mm/migrate.c | 1 + >> mm/mprotect.c | 1 + >> 6 files changed, 56 insertions(+), 5 deletions(-) >> >> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >> index f8dbeda617a7..bc9fb9d39b2c 100644 >> --- a/include/linux/memory-tiers.h >> +++ b/include/linux/memory-tiers.h >> @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type * >> #ifdef CONFIG_MIGRATION >> int next_demotion_node(int node); >> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); >> +bool node_is_toptier(int node); >> #else >> static inline int next_demotion_node(int node) >> { >> @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >> { >> *targets = NODE_MASK_NONE; >> } >> + >> +static inline bool node_is_toptier(int node) >> +{ >> + return true; >> +} >> #endif >> >> #else >> @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >> { >> *targets = NODE_MASK_NONE; >> } >> + >> +static inline bool node_is_toptier(int node) >> +{ >> + return true; >> +} >> #endif /* CONFIG_NUMA */ >> #endif /* _LINUX_MEMORY_TIERS_H */ >> diff --git a/include/linux/node.h b/include/linux/node.h >> index 40d641a8bfb0..9ec680dd607f 100644 >> --- a/include/linux/node.h >> +++ b/include/linux/node.h >> @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, >> >> #define to_node(device) container_of(device, struct node, dev) >> >> -static inline bool node_is_toptier(int node) >> -{ >> - return node_state(node, N_CPU); >> -} >> - >> #endif /* _LINUX_NODE_H_ */ >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 834f288b3769..8405662646e9 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -35,6 +35,7 @@ >> #include <linux/numa.h> >> #include <linux/page_owner.h> >> #include <linux/sched/sysctl.h> >> +#include <linux/memory-tiers.h> >> >> #include <asm/tlb.h> >> #include <asm/pgalloc.h> >> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >> index 84e2be31a853..36d87dc422ab 100644 >> --- a/mm/memory-tiers.c >> +++ b/mm/memory-tiers.c >> @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock); >> static LIST_HEAD(memory_tiers); >> struct memory_dev_type *node_memory_types[MAX_NUMNODES]; >> #ifdef CONFIG_MIGRATION >> +static int top_tier_adistance; >> /* >> * node_demotion[] examples: >> * >> @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node) >> } >> >> #ifdef CONFIG_MIGRATION >> +bool node_is_toptier(int node) >> +{ >> + bool toptier; >> + pg_data_t *pgdat; >> + struct memory_tier *memtier; >> + >> + pgdat = NODE_DATA(node); >> + if (!pgdat) >> + return false; >> + >> + rcu_read_lock(); >> + memtier = rcu_dereference(pgdat->memtier); >> + if (!memtier) { >> + toptier = true; >> + goto out; >> + } >> + if (memtier->adistance_start >= top_tier_adistance) >> + toptier = true; >> + else >> + toptier = false; >> +out: >> + rcu_read_unlock(); >> + return toptier; >> +} >> + >> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) >> { >> struct memory_tier *memtier; >> @@ -315,6 +341,22 @@ static void establish_demotion_targets(void) >> } >> } while (1); >> } >> + /* >> + * Promotion is allowed from a memory tier to higher >> + * memory tier only if the memory tier doesn't include >> + * compute. We want to skip promotion from a memory tier, >> + * if any node that is part of the memory tier have CPUs. >> + * Once we detect such a memory tier, we consider that tier >> + * as top tiper from which promotion on is not allowed. >> + */ >> + list_for_each_entry(memtier, &memory_tiers, list) { >> + tier_nodes = get_memtier_nodemask(memtier); >> + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); >> + if (!nodes_empty(tier_nodes)) { >> + top_tier_adistance = memtier->adistance_start; > > IMHO, this should be, > > top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE; > Good catch. Will update. BTW i did send v12 version of the patchset already to the list. -aneesh