On 9/12/23 01:20, Huang Ying wrote: > The abstract distance may be calculated by various drivers, such as > ACPI HMAT, CXL CDAT, etc. While it may be used by various code which > hot-add memory node, such as dax/kmem etc. To decouple the algorithm > users and the providers, the abstract distance calculation algorithms > management mechanism is implemented in this patch. It provides > interface for the providers to register the implementation, and > interface for the users. > > Multiple algorithm implementations can cooperate via calculating > abstract distance for different memory nodes. The preference of > algorithm implementations can be specified via > priority (notifier_block.priority). > > Signed-off-by: "Huang, Ying" <ying.huang@xxxxxxxxx> > Tested-by: Bharata B Rao <bharata@xxxxxxx> > Cc: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx> > Cc: Wei Xu <weixugc@xxxxxxxxxx> > Cc: Alistair Popple <apopple@xxxxxxxxxx> > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Cc: Dave Hansen <dave.hansen@xxxxxxxxx> > Cc: Davidlohr Bueso <dave@xxxxxxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxxxx> > Cc: Yang Shi <shy828301@xxxxxxxxx> > Cc: Dave Jiang <dave.jiang@xxxxxxxxx> > Cc: Rafael J Wysocki <rafael.j.wysocki@xxxxxxxxx> Reviewed-by: Dave Jiang <dave.jiang@xxxxxxxxx> > --- > include/linux/memory-tiers.h | 19 ++++++++++++ > mm/memory-tiers.c | 59 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 78 insertions(+) > > diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h > index 437441cdf78f..c8382220cced 100644 > --- a/include/linux/memory-tiers.h > +++ b/include/linux/memory-tiers.h > @@ -6,6 +6,7 @@ > #include <linux/nodemask.h> > #include <linux/kref.h> > #include <linux/mmzone.h> > +#include <linux/notifier.h> > /* > * Each tier cover a abstrace distance chunk size of 128 > */ > @@ -36,6 +37,9 @@ struct memory_dev_type *alloc_memory_type(int adistance); > void put_memory_type(struct memory_dev_type *memtype); > void init_node_memory_type(int node, struct memory_dev_type *default_type); > void clear_node_memory_type(int node, struct memory_dev_type *memtype); > +int register_mt_adistance_algorithm(struct notifier_block *nb); > +int unregister_mt_adistance_algorithm(struct notifier_block *nb); > +int mt_calc_adistance(int node, int *adist); > #ifdef CONFIG_MIGRATION > int next_demotion_node(int node); > void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); > @@ -97,5 +101,20 @@ static inline bool node_is_toptier(int node) > { > return true; > } > + > +static inline int register_mt_adistance_algorithm(struct notifier_block *nb) > +{ > + return 0; > +} > + > +static inline int unregister_mt_adistance_algorithm(struct notifier_block *nb) > +{ > + return 0; > +} > + > +static inline int mt_calc_adistance(int node, int *adist) > +{ > + return NOTIFY_DONE; > +} > #endif /* CONFIG_NUMA */ > #endif /* _LINUX_MEMORY_TIERS_H */ > diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c > index 37a4f59d9585..76c0ad47a5ad 100644 > --- a/mm/memory-tiers.c > +++ b/mm/memory-tiers.c > @@ -5,6 +5,7 @@ > #include <linux/kobject.h> > #include <linux/memory.h> > #include <linux/memory-tiers.h> > +#include <linux/notifier.h> > > #include "internal.h" > > @@ -105,6 +106,8 @@ static int top_tier_adistance; > static struct demotion_nodes *node_demotion __read_mostly; > #endif /* CONFIG_MIGRATION */ > > +static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms); > + > static inline struct memory_tier *to_memory_tier(struct device *device) > { > return container_of(device, struct memory_tier, dev); > @@ -592,6 +595,62 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype) > } > EXPORT_SYMBOL_GPL(clear_node_memory_type); > > +/** > + * register_mt_adistance_algorithm() - Register memory tiering abstract distance algorithm > + * @nb: The notifier block which describe the algorithm > + * > + * Return: 0 on success, errno on error. > + * > + * Every memory tiering abstract distance algorithm provider needs to > + * register the algorithm with register_mt_adistance_algorithm(). To > + * calculate the abstract distance for a specified memory node, the > + * notifier function will be called unless some high priority > + * algorithm has provided result. The prototype of the notifier > + * function is as follows, > + * > + * int (*algorithm_notifier)(struct notifier_block *nb, > + * unsigned long nid, void *data); > + * > + * Where "nid" specifies the memory node, "data" is the pointer to the > + * returned abstract distance (that is, "int *adist"). If the > + * algorithm provides the result, NOTIFY_STOP should be returned. > + * Otherwise, return_value & %NOTIFY_STOP_MASK == 0 to allow the next > + * algorithm in the chain to provide the result. > + */ > +int register_mt_adistance_algorithm(struct notifier_block *nb) > +{ > + return blocking_notifier_chain_register(&mt_adistance_algorithms, nb); > +} > +EXPORT_SYMBOL_GPL(register_mt_adistance_algorithm); > + > +/** > + * unregister_mt_adistance_algorithm() - Unregister memory tiering abstract distance algorithm > + * @nb: the notifier block which describe the algorithm > + * > + * Return: 0 on success, errno on error. > + */ > +int unregister_mt_adistance_algorithm(struct notifier_block *nb) > +{ > + return blocking_notifier_chain_unregister(&mt_adistance_algorithms, nb); > +} > +EXPORT_SYMBOL_GPL(unregister_mt_adistance_algorithm); > + > +/** > + * mt_calc_adistance() - Calculate abstract distance with registered algorithms > + * @node: the node to calculate abstract distance for > + * @adist: the returned abstract distance > + * > + * Return: if return_value & %NOTIFY_STOP_MASK != 0, then some > + * abstract distance algorithm provides the result, and return it via > + * @adist. Otherwise, no algorithm can provide the result and @adist > + * will be kept as it is. > + */ > +int mt_calc_adistance(int node, int *adist) > +{ > + return blocking_notifier_call_chain(&mt_adistance_algorithms, node, adist); > +} > +EXPORT_SYMBOL_GPL(mt_calc_adistance); > + > static int __meminit memtier_hotplug_callback(struct notifier_block *self, > unsigned long action, void *_arg) > {