From: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx> Node migration enables the grouping or migration of nodes between tiers based on nodes' latencies and bandwidth characteristics. Since nodes of the same memory-type can exist in different tiers and can migrate from one tier to another, it is necessary to maintain nodes per tier instead of maintaining a list of nodes grouped using memory type(siblings) within the tier. To migrate a node from one tier to another, remove the node from the current tier and insert it into the target tier. If the target tier does not exist, create a new one. Signed-off-by: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx> --- drivers/base/node.c | 6 ++++ include/linux/memory-tiers.h | 5 +++ include/linux/node.h | 5 +++ mm/memory-tiers.c | 65 +++++++++++++++++------------------- 4 files changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 1e63c692977b..8290ea96b439 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -608,10 +608,16 @@ static ssize_t adistance_offset_store(struct device *dev, return -EINVAL; node_devices[nid]->adistance_offset = value; + node_memtier_change(nid); return size; } static DEVICE_ATTR_RW(adistance_offset); +int get_node_adistance_offset(int nid) +{ + return node_devices[nid]->adistance_offset; +} + static struct attribute *node_dev_attrs[] = { &dev_attr_meminfo.attr, &dev_attr_numastat.attr, diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index ff4e7136ab40..e86c23873334 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -49,6 +49,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf, const char *source); int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist); int get_target_memtier_adistance(int node, int adistance_offset); +void node_memtier_change(int node); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -142,5 +143,9 @@ static int get_target_memtier_adistance(int node, int adistance_offset) { return 0; } + +static inline void node_memtier_change(int node) +{ +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/node.h b/include/linux/node.h index fd0f4f3177f8..5150215b4922 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -139,6 +139,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, unsigned access); +extern int get_node_adistance_offset(int nid); #else static inline void node_dev_init(void) { @@ -166,6 +167,10 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { } +static inline int get_node_adistance_offset(int nid) +{ + return 0; +} #endif #define to_node(device) container_of(device, struct node, dev) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index a40d4d4383d7..b6cd86977731 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -23,6 +23,8 @@ struct memory_tier { struct device dev; /* All the nodes that are part of all the lower memory tiers. */ nodemask_t lower_tier_mask; + /* Nodes linked to this tier*/ + nodemask_t nodes; }; struct demotion_nodes { @@ -120,13 +122,7 @@ static inline struct memory_tier *to_memory_tier(struct device *device) static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier) { - nodemask_t nodes = NODE_MASK_NONE; - struct memory_dev_type *memtype; - - list_for_each_entry(memtype, &memtier->memory_types, tier_sibling) - nodes_or(nodes, nodes, memtype->nodes); - - return nodes; + return memtier->nodes; } static void memory_tier_device_release(struct device *dev) @@ -181,33 +177,22 @@ int get_target_memtier_adistance(int node, int adistance_offset) return node_adistance; } -static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype) +static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype, + int tier_adistance) { int ret; bool found_slot = false; struct memory_tier *memtier, *new_memtier; - int adistance = memtype->adistance; + int adistance; unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE; lockdep_assert_held_once(&memory_tier_lock); - adistance = round_down(adistance, memtier_adistance_chunk_size); - /* - * If the memtype is already part of a memory tier, - * just return that. - */ - if (!list_empty(&memtype->tier_sibling)) { - list_for_each_entry(memtier, &memory_tiers, list) { - if (adistance == memtier->adistance_start) - return memtier; - } - WARN_ON(1); - return ERR_PTR(-EINVAL); - } + adistance = round_down(tier_adistance, memtier_adistance_chunk_size); list_for_each_entry(memtier, &memory_tiers, list) { if (adistance == memtier->adistance_start) { - goto link_memtype; + return memtier; } else if (adistance < memtier->adistance_start) { found_slot = true; break; @@ -238,9 +223,6 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty return ERR_PTR(ret); } memtier = new_memtier; - -link_memtype: - list_add(&memtype->tier_sibling, &memtier->memory_types); return memtier; } @@ -499,7 +481,7 @@ static struct memory_tier *set_node_memory_tier(int node) struct memory_tier *memtier; struct memory_dev_type *memtype; pg_data_t *pgdat = NODE_DATA(node); - + int tier_adistance; lockdep_assert_held_once(&memory_tier_lock); @@ -510,9 +492,13 @@ static struct memory_tier *set_node_memory_tier(int node) memtype = node_memory_types[node].memtype; node_set(node, memtype->nodes); - memtier = find_create_memory_tier(memtype); + tier_adistance = get_node_adistance_offset(node); + tier_adistance = memtype->adistance + tier_adistance; + + memtier = find_create_memory_tier(memtype, tier_adistance); if (!IS_ERR(memtier)) rcu_assign_pointer(pgdat->memtier, memtier); + node_set(node, memtier->nodes); return memtier; } @@ -548,11 +534,9 @@ static bool clear_node_memory_tier(int node) synchronize_rcu(); memtype = node_memory_types[node].memtype; node_clear(node, memtype->nodes); - if (nodes_empty(memtype->nodes)) { - list_del_init(&memtype->tier_sibling); - if (list_empty(&memtier->memory_types)) - destroy_memory_tier(memtier); - } + node_clear(node, memtier->nodes); + if (nodes_empty(memtier->nodes)) + destroy_memory_tier(memtier); cleared = true; } return cleared; @@ -575,7 +559,6 @@ struct memory_dev_type *alloc_memory_type(int adistance) return ERR_PTR(-ENOMEM); memtype->adistance = adistance; - INIT_LIST_HEAD(&memtype->tier_sibling); memtype->nodes = NODE_MASK_NONE; kref_init(&memtype->kref); return memtype; @@ -615,6 +598,20 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype) } EXPORT_SYMBOL_GPL(clear_node_memory_type); +void node_memtier_change(int node) +{ + struct memory_tier *memtier; + + mutex_lock(&memory_tier_lock); + if (clear_node_memory_tier(node)) + establish_demotion_targets(); + memtier = set_node_memory_tier(node); + if (!IS_ERR(memtier)) + establish_demotion_targets(); + mutex_unlock(&memory_tier_lock); +} + + static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix) { pr_info( -- 2.25.1