This patch adds support to create memory tiers with specific rank value from userspace. To avoid race while creating memory tiers /sys/devices/system/memtier/create_tier_from_rank file is provided. Writing to this file with a specific rank value creates a new memory tier with the specified rank value. Memory tiers created from userspace gets destroyed when the memory tier nodelist becomes empty. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> --- include/linux/memory-tiers.h | 3 +- mm/memory-tiers.c | 74 ++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index e70f0040d845..52896f5970b7 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -15,7 +15,8 @@ #define MEMORY_RANK_PMEM 100 #define DEFAULT_MEMORY_TIER MEMORY_TIER_DRAM -#define MAX_MEMORY_TIERS 3 +#define MAX_STATIC_MEMORY_TIERS 3 +#define MAX_MEMORY_TIERS (MAX_STATIC_MEMORY_TIERS + 2) extern bool numa_demotion_enabled; int node_create_and_set_memory_tier(int node, int tier); diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 7bfdfac4d43e..de3b7403ae6f 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -6,6 +6,7 @@ #include <linux/memory-tiers.h> #include <linux/random.h> #include <linux/memory.h> +#include <linux/idr.h> #include "internal.h" @@ -126,9 +127,12 @@ static const struct attribute_group *memory_tier_dev_groups[] = { NULL }; +static DEFINE_IDA(memtier_dev_id); static void memory_tier_device_release(struct device *dev) { struct memory_tier *tier = to_memory_tier(dev); + if (tier->dev.id >= MAX_STATIC_MEMORY_TIERS) + ida_free(&memtier_dev_id, tier->dev.id); kfree(tier); } @@ -195,6 +199,17 @@ static struct memory_tier *register_memory_tier(unsigned int tier, return memtier; } +static void unregister_memory_tier(struct memory_tier *memtier) +{ + /* + * Don't destroy static memory tiers. + */ + if (memtier->dev.id < MAX_STATIC_MEMORY_TIERS) + return; + list_del(&memtier->list); + device_unregister(&memtier->dev); +} + static struct memory_tier *__node_get_memory_tier(int node) { struct memory_tier *memtier; @@ -267,6 +282,10 @@ int node_create_and_set_memory_tier(int node, int tier) node_set(node, current_tier->nodelist); goto out; } + + if (nodes_empty(current_tier->nodelist)) + unregister_memory_tier(current_tier); + establish_migration_targets(); out: mutex_unlock(&memory_tier_lock); @@ -350,6 +369,9 @@ int node_reset_memory_tier(int node, int tier) goto out; } + if (nodes_empty(current_tier->nodelist)) + unregister_memory_tier(current_tier); + establish_migration_targets(); out: mutex_unlock(&memory_tier_lock); @@ -550,9 +572,61 @@ default_tier_show(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR_RO(default_tier); +static inline int memtier_alloc_id(void) +{ + return ida_alloc_range(&memtier_dev_id, + MAX_STATIC_MEMORY_TIERS, + MAX_MEMORY_TIERS - 1, GFP_KERNEL); +} + +static ssize_t create_tier_from_rank_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret, tier, rank; + struct memory_tier *memtier; + + ret = kstrtouint(buf, 10, &rank); + if (ret) + return ret; + + if (rank == MEMORY_RANK_HBM_GPU || + rank == MEMORY_RANK_DRAM || + rank == MEMORY_RANK_PMEM) + return -EINVAL; + + mutex_lock(&memory_tier_lock); + /* + * We don't support multiple tiers with same rank value + */ + list_for_each_entry(memtier, &memory_tiers, list) { + if (memtier->rank == rank) { + ret = -EINVAL; + goto out; + } + } + tier = memtier_alloc_id(); + if (tier < 0) { + ret = -ENOSPC; + goto out; + } + memtier = register_memory_tier(tier, rank); + if (IS_ERR(memtier)) { + ret = PTR_ERR(memtier); + goto out; + } + + ret = count; +out: + mutex_unlock(&memory_tier_lock); + return ret; +} +static DEVICE_ATTR_WO(create_tier_from_rank); + static struct attribute *memory_tier_attrs[] = { &dev_attr_max_tier.attr, &dev_attr_default_tier.attr, + &dev_attr_create_tier_from_rank.attr, NULL }; -- 2.36.1