From: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx> Node migration enables the grouping or migration of nodes between tiers based on nodes' latencies and bandwidth characteristics. Since nodes of the same memory-type can exist in different tiers and can migrate from one tier to another, it is necessary to maintain nodes per tier instead of maintaining a list of nodes grouped using memory type(siblings) within the tier. Signed-off-by: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx> --- drivers/base/node.c | 6 ++++ include/linux/memory-tiers.h | 5 +++ include/linux/node.h | 5 +++ mm/memory-tiers.c | 66 +++++++++++++++++------------------- 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 788176b3585a..179d9004e4f3 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -597,6 +597,7 @@ static ssize_t memtier_override_store(struct device *dev, return size; ret = get_memtier_adistance_offset(nid, memtier); node_devices[nid]->adistance_offset = ret; + node_memtier_change(nid); return size; } @@ -607,6 +608,11 @@ void set_node_memtierid(int node, int memtierid) node_devices[node]->memtier = memtierid; } +int get_node_adistance_offset(int node) +{ + return node_devices[node]->adistance_offset; +} + static struct attribute *node_dev_attrs[] = { &dev_attr_meminfo.attr, &dev_attr_numastat.attr, diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 0dba8027e785..b323c2e2e417 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -54,6 +54,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, const char *source); int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist); int get_memtier_adistance_offset(int node, int memtier); +void node_memtier_change(int node); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -142,5 +143,9 @@ static inline int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist) { return -EIO; } + +static inline void node_memtier_change(int node) +{ +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/node.h b/include/linux/node.h index 1c4f4be39db4..da679577a271 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -141,6 +141,7 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, unsigned access); extern void set_node_memtierid(int node, int memtierid); +extern int get_node_adistance_offset(int nid); #else static inline void node_dev_init(void) { @@ -171,6 +172,10 @@ static inline void unregister_memory_block_under_nodes(struct memory_block *mem_ static inline void set_node_memtierid(int node, int memtierid) { } +static inline int get_node_adistance_offset(int nid) +{ + return 0; +} #endif #define to_node(device) container_of(device, struct node, dev) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 31ed3c577836..66e1eae97e47 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -23,6 +23,8 @@ struct memory_tier { struct device dev; /* All the nodes that are part of all the lower memory tiers. */ nodemask_t lower_tier_mask; + /* Nodes linked to this tier*/ + nodemask_t nodes; }; struct demotion_nodes { @@ -120,13 +122,7 @@ static inline struct memory_tier *to_memory_tier(struct device *device) static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier) { - nodemask_t nodes = NODE_MASK_NONE; - struct memory_dev_type *memtype; - - list_for_each_entry(memtype, &memtier->memory_types, tier_sibling) - nodes_or(nodes, nodes, memtype->nodes); - - return nodes; + return memtier->nodes; } static void memory_tier_device_release(struct device *dev) @@ -182,33 +178,22 @@ int get_memtier_adistance_offset(int node, int memtier) return adistance_offset; } -static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype) +static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype, + int tier_adistance) { int ret; bool found_slot = false; struct memory_tier *memtier, *new_memtier; - int adistance = memtype->adistance; + int adistance; unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE; lockdep_assert_held_once(&memory_tier_lock); - adistance = round_down(adistance, memtier_adistance_chunk_size); - /* - * If the memtype is already part of a memory tier, - * just return that. - */ - if (!list_empty(&memtype->tier_sibling)) { - list_for_each_entry(memtier, &memory_tiers, list) { - if (adistance == memtier->adistance_start) - return memtier; - } - WARN_ON(1); - return ERR_PTR(-EINVAL); - } + adistance = round_down(tier_adistance, memtier_adistance_chunk_size); list_for_each_entry(memtier, &memory_tiers, list) { if (adistance == memtier->adistance_start) { - goto link_memtype; + return memtier; } else if (adistance < memtier->adistance_start) { found_slot = true; break; @@ -238,11 +223,8 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty put_device(&new_memtier->dev); return ERR_PTR(ret); } - memtier = new_memtier; -link_memtype: - list_add(&memtype->tier_sibling, &memtier->memory_types); - return memtier; + return new_memtier; } static struct memory_tier *__node_get_memory_tier(int node) @@ -500,7 +482,7 @@ static struct memory_tier *set_node_memory_tier(int node) struct memory_tier *memtier; struct memory_dev_type *memtype; pg_data_t *pgdat = NODE_DATA(node); - + int tier_adistance; lockdep_assert_held_once(&memory_tier_lock); @@ -511,11 +493,15 @@ static struct memory_tier *set_node_memory_tier(int node) memtype = node_memory_types[node].memtype; node_set(node, memtype->nodes); - memtier = find_create_memory_tier(memtype); + tier_adistance = get_node_adistance_offset(node); + tier_adistance = memtype->adistance + tier_adistance; + + memtier = find_create_memory_tier(memtype, tier_adistance); if (!IS_ERR(memtier)) { rcu_assign_pointer(pgdat->memtier, memtier); set_node_memtierid(node, memtier->dev.id); } + node_set(node, memtier->nodes); return memtier; } @@ -551,11 +537,9 @@ static bool clear_node_memory_tier(int node) synchronize_rcu(); memtype = node_memory_types[node].memtype; node_clear(node, memtype->nodes); - if (nodes_empty(memtype->nodes)) { - list_del_init(&memtype->tier_sibling); - if (list_empty(&memtier->memory_types)) - destroy_memory_tier(memtier); - } + node_clear(node, memtier->nodes); + if (nodes_empty(memtier->nodes)) + destroy_memory_tier(memtier); cleared = true; } return cleared; @@ -578,7 +562,6 @@ struct memory_dev_type *alloc_memory_type(int adistance) return ERR_PTR(-ENOMEM); memtype->adistance = adistance; - INIT_LIST_HEAD(&memtype->tier_sibling); memtype->nodes = NODE_MASK_NONE; kref_init(&memtype->kref); return memtype; @@ -618,6 +601,19 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype) } EXPORT_SYMBOL_GPL(clear_node_memory_type); +void node_memtier_change(int node) +{ + struct memory_tier *memtier; + + mutex_lock(&memory_tier_lock); + if (clear_node_memory_tier(node)) + establish_demotion_targets(); + memtier = set_node_memory_tier(node); + if (!IS_ERR(memtier)) + establish_demotion_targets(); + mutex_unlock(&memory_tier_lock); +} + static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix) { pr_info( -- 2.25.1