Also update different helpes to use NODE_DATA()->memtier. Since node specific memtier can change based on the reassignment of NUMA node to a different memory tiers, accessing NODE_DATA()->memtier needs to happen under an rcu read lock or memory_tier_lock. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> --- include/linux/mmzone.h | 3 +++ mm/memory-tiers.c | 40 +++++++++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index aab70355d64f..353812495a70 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -928,6 +928,9 @@ typedef struct pglist_data { /* Per-node vmstats */ struct per_cpu_nodestat __percpu *per_cpu_nodestats; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA + struct memory_tier __rcu *memtier; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 60845aa74afc..f982ca6b3559 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -3,6 +3,7 @@ #include <linux/lockdep.h> #include <linux/memory.h> #include <linux/random.h> +#include <linux/mmzone.h> #include <linux/memory-tiers.h> #include "internal.h" @@ -142,12 +143,18 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty static struct memory_tier *__node_get_memory_tier(int node) { - struct memory_dev_type *memtype; + pg_data_t *pgdat; - memtype = node_memory_types[node]; - if (memtype) - return memtype->memtier; - return NULL; + pgdat = NODE_DATA(node); + if (!pgdat) + return NULL; + /* + * Since we hold memory_tier_lock, we can avoid + * RCU read locks when accessing the details. No + * parallel updates are possible here. + */ + return rcu_dereference_check(pgdat->memtier, + lockdep_is_held(&memory_tier_lock)); } #ifdef CONFIG_MIGRATION @@ -290,6 +297,7 @@ static inline void establish_demotion_targets(void) {} static void init_node_memory_tier(int node) { + pg_data_t *pgdat = NODE_DATA(node); struct memory_tier *memtier; mutex_lock(&memory_tier_lock); @@ -311,8 +319,12 @@ static void init_node_memory_tier(int node) } memtype = node_memory_types[node]; memtier = find_create_memory_tier(memtype); + if (IS_ERR(memtier)) + goto err_out; + rcu_assign_pointer(pgdat->memtier, memtier); } establish_demotion_targets(); +err_out: mutex_unlock(&memory_tier_lock); } @@ -324,13 +336,26 @@ static void destroy_memory_tier(struct memory_tier *memtier) static void clear_node_memory_tier(int node) { + pg_data_t *pgdat; struct memory_tier *current_memtier; + pgdat = NODE_DATA(node); + if (!pgdat) + return; + mutex_lock(&memory_tier_lock); + /* + * Make sure that anybody looking at NODE_DATA who finds + * a valid memtier finds memory_dev_types with nodes still + * linked to the memtier. We achieve this by waiting for + * rcu read section to finish using synchronize_rcu. + */ current_memtier = __node_get_memory_tier(node); if (current_memtier) { struct memory_dev_type *memtype; + rcu_assign_pointer(pgdat->memtier, NULL); + synchronize_rcu(); memtype = node_memory_types[node]; node_clear(node, memtype->nodes); if (nodes_empty(memtype->nodes)) { @@ -386,6 +411,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self, static int __init memory_tier_init(void) { + int node; struct memory_tier *memtier; mutex_lock(&memory_tier_lock); @@ -396,6 +422,10 @@ static int __init memory_tier_init(void) if (IS_ERR(memtier)) panic("%s() failed to register memory tier: %ld\n", __func__, PTR_ERR(memtier)); + + for_each_node_state(node, N_MEMORY) + rcu_assign_pointer(NODE_DATA(node)->memtier, memtier); + mutex_unlock(&memory_tier_lock); #ifdef CONFIG_MIGRATION node_demotion = kcalloc(MAX_NUMNODES, sizeof(struct demotion_nodes), -- 2.37.1