On 07/11/2023 15:22, Vishal Verma wrote: > Large amounts of memory managed by the kmem driver may come in via CXL, > and it is often desirable to have the memmap for this memory on the new > memory itself. > > Enroll kmem-managed memory for memmap_on_memory semantics if the dax > region originates via CXL. For non-CXL dax regions, retain the existing > default behavior of hot adding without memmap_on_memory semantics. > > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: David Hildenbrand <david@xxxxxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxx> > Cc: Oscar Salvador <osalvador@xxxxxxx> > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Cc: Dave Jiang <dave.jiang@xxxxxxxxx> > Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> > Cc: Huang Ying <ying.huang@xxxxxxxxx> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> > Reviewed-by: David Hildenbrand <david@xxxxxxxxxx> > Reviewed-by: "Huang, Ying" <ying.huang@xxxxxxxxx> > Signed-off-by: Vishal Verma <vishal.l.verma@xxxxxxxxx> Tested-by: Li Zhijian <lizhijian@xxxxxxxxxxx> # both cxl.kmem and nvdimm.kmem > --- > drivers/dax/bus.h | 1 + > drivers/dax/dax-private.h | 1 + > drivers/dax/bus.c | 3 +++ > drivers/dax/cxl.c | 1 + > drivers/dax/hmem/hmem.c | 1 + > drivers/dax/kmem.c | 8 +++++++- > drivers/dax/pmem.c | 1 + > 7 files changed, 15 insertions(+), 1 deletion(-) > > diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h > index 1ccd23360124..cbbf64443098 100644 > --- a/drivers/dax/bus.h > +++ b/drivers/dax/bus.h > @@ -23,6 +23,7 @@ struct dev_dax_data { > struct dev_pagemap *pgmap; > resource_size_t size; > int id; > + bool memmap_on_memory; > }; > > struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data); > diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h > index 27cf2daaaa79..446617b73aea 100644 > --- a/drivers/dax/dax-private.h > +++ b/drivers/dax/dax-private.h > @@ -70,6 +70,7 @@ struct dev_dax { > struct ida ida; > struct device dev; > struct dev_pagemap *pgmap; > + bool memmap_on_memory; > int nr_range; > struct dev_dax_range { > unsigned long pgoff; > diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c > index 1659b787b65f..1ff1ab5fa105 100644 > --- a/drivers/dax/bus.c > +++ b/drivers/dax/bus.c > @@ -367,6 +367,7 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr, > .dax_region = dax_region, > .size = 0, > .id = -1, > + .memmap_on_memory = false, > }; > struct dev_dax *dev_dax = devm_create_dev_dax(&data); > > @@ -1400,6 +1401,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) > dev_dax->align = dax_region->align; > ida_init(&dev_dax->ida); > > + dev_dax->memmap_on_memory = data->memmap_on_memory; > + > inode = dax_inode(dax_dev); > dev->devt = inode->i_rdev; > dev->bus = &dax_bus_type; > diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c > index 8bc9d04034d6..c696837ab23c 100644 > --- a/drivers/dax/cxl.c > +++ b/drivers/dax/cxl.c > @@ -26,6 +26,7 @@ static int cxl_dax_region_probe(struct device *dev) > .dax_region = dax_region, > .id = -1, > .size = range_len(&cxlr_dax->hpa_range), > + .memmap_on_memory = true, > }; > > return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); > diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > index 5d2ddef0f8f5..b9da69f92697 100644 > --- a/drivers/dax/hmem/hmem.c > +++ b/drivers/dax/hmem/hmem.c > @@ -36,6 +36,7 @@ static int dax_hmem_probe(struct platform_device *pdev) > .dax_region = dax_region, > .id = -1, > .size = region_idle ? 0 : range_len(&mri->range), > + .memmap_on_memory = false, > }; > > return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); > diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c > index 369c698b7706..42ee360cf4e3 100644 > --- a/drivers/dax/kmem.c > +++ b/drivers/dax/kmem.c > @@ -12,6 +12,7 @@ > #include <linux/mm.h> > #include <linux/mman.h> > #include <linux/memory-tiers.h> > +#include <linux/memory_hotplug.h> > #include "dax-private.h" > #include "bus.h" > > @@ -93,6 +94,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) > struct dax_kmem_data *data; > struct memory_dev_type *mtype; > int i, rc, mapped = 0; > + mhp_t mhp_flags; > int numa_node; > int adist = MEMTIER_DEFAULT_DAX_ADISTANCE; > > @@ -179,12 +181,16 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) > */ > res->flags = IORESOURCE_SYSTEM_RAM; > > + mhp_flags = MHP_NID_IS_MGID; > + if (dev_dax->memmap_on_memory) > + mhp_flags |= MHP_MEMMAP_ON_MEMORY; > + > /* > * Ensure that future kexec'd kernels will not treat > * this as RAM automatically. > */ > rc = add_memory_driver_managed(data->mgid, range.start, > - range_len(&range), kmem_name, MHP_NID_IS_MGID); > + range_len(&range), kmem_name, mhp_flags); > > if (rc) { > dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", > diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c > index ae0cb113a5d3..f3c6c67b8412 100644 > --- a/drivers/dax/pmem.c > +++ b/drivers/dax/pmem.c > @@ -63,6 +63,7 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) > .id = id, > .pgmap = &pgmap, > .size = range_len(&range), > + .memmap_on_memory = false, > }; > > return devm_create_dev_dax(&data); >