On Wed 05-04-17 16:40:11, Jérôme Glisse wrote: > When hotpluging memory we want more information on the type of memory. > This is to extend ZONE_DEVICE to support new type of memory other than > the persistent memory. Existing user of ZONE_DEVICE (persistent memory) > will be left un-modified. My current hotplug rework [1] is touching this path as well. It is not really clear from the chage why you are changing this and what are the further expectations of MEMORY_DEVICE_PERSISTENT. Infact I have replaced for_device with want__memblock [2]. I plan to repost shortly but I would like to understand your modifications more to reduce potential conflicts in the code. Why do you need to distinguish different types of memory anyway. [1] http://lkml.kernel.org/r/20170330115454.32154-1-mhocko@xxxxxxxxxx [2] the current patchset is in git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git branch attempts/rewrite-mem_hotplug-WIP > > Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx> > Cc: Russell King <linux@xxxxxxxxxxxxxxx> > Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> > Cc: Paul Mackerras <paulus@xxxxxxxxx> > Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> > Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx> > Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx> > Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxxxxxxxxx> > Cc: Rich Felker <dalias@xxxxxxxx> > Cc: Chris Metcalf <cmetcalf@xxxxxxxxxxxx> > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> > --- > arch/ia64/mm/init.c | 36 +++++++++++++++++++++++++++++++++--- > arch/powerpc/mm/mem.c | 37 ++++++++++++++++++++++++++++++++++--- > arch/s390/mm/init.c | 16 ++++++++++++++-- > arch/sh/mm/init.c | 35 +++++++++++++++++++++++++++++++++-- > arch/x86/mm/init_32.c | 41 +++++++++++++++++++++++++++++++++++++---- > arch/x86/mm/init_64.c | 39 +++++++++++++++++++++++++++++++++++---- > include/linux/memory_hotplug.h | 24 ++++++++++++++++++++++-- > include/linux/memremap.h | 2 ++ > kernel/memremap.c | 5 +++-- > mm/memory_hotplug.c | 4 ++-- > 10 files changed, 215 insertions(+), 24 deletions(-) > > diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c > index 06cdaef..c910b3f 100644 > --- a/arch/ia64/mm/init.c > +++ b/arch/ia64/mm/init.c > @@ -645,20 +645,36 @@ mem_init (void) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > +int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type) > { > pg_data_t *pgdat; > struct zone *zone; > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > + bool for_device = false; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + break; > + case MEMORY_DEVICE_PERSISTENT: > + for_device = true; > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > pgdat = NODE_DATA(nid); > > zone = pgdat->node_zones + > zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); > ret = __add_pages(nid, zone, start_pfn, nr_pages); > - > if (ret) > printk("%s: Problem encountered in __add_pages() as ret=%d\n", > __func__, ret); > @@ -667,13 +683,27 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > } > > #ifdef CONFIG_MEMORY_HOTREMOVE > -int arch_remove_memory(u64 start, u64 size) > +int arch_remove_memory(u64 start, u64 size, enum memory_type type) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > struct zone *zone; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + case MEMORY_DEVICE_PERSISTENT: > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > zone = page_zone(pfn_to_page(start_pfn)); > ret = __remove_pages(zone, start_pfn, nr_pages); > if (ret) > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index 5f84433..0933261 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -126,14 +126,31 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) > return -ENODEV; > } > > -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > +int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type) > { > struct pglist_data *pgdata; > - struct zone *zone; > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > + bool for_device = false; > + struct zone *zone; > int rc; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + break; > + case MEMORY_DEVICE_PERSISTENT: > + for_device = true; > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > pgdata = NODE_DATA(nid); > > start = (unsigned long)__va(start); > @@ -153,13 +170,27 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > } > > #ifdef CONFIG_MEMORY_HOTREMOVE > -int arch_remove_memory(u64 start, u64 size) > +int arch_remove_memory(u64 start, u64 size, enum memory_type type) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > struct zone *zone; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + case MEMORY_DEVICE_PERSISTENT: > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > zone = page_zone(pfn_to_page(start_pfn)); > ret = __remove_pages(zone, start_pfn, nr_pages); > if (ret) > diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c > index bf5b8a0..20d7714 100644 > --- a/arch/s390/mm/init.c > +++ b/arch/s390/mm/init.c > @@ -153,7 +153,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) > #endif > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > +int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type) > { > unsigned long zone_start_pfn, zone_end_pfn, nr_pages; > unsigned long start_pfn = PFN_DOWN(start); > @@ -162,6 +162,18 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > struct zone *zone; > int rc, i; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > rc = vmem_add_mapping(start, size); > if (rc) > return rc; > @@ -205,7 +217,7 @@ unsigned long memory_block_size_bytes(void) > } > > #ifdef CONFIG_MEMORY_HOTREMOVE > -int arch_remove_memory(u64 start, u64 size) > +int arch_remove_memory(u64 start, u64 size, enum memory_type type) > { > /* > * There is no hardware or firmware interface which could trigger a > diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c > index 7549186..f37e7a6 100644 > --- a/arch/sh/mm/init.c > +++ b/arch/sh/mm/init.c > @@ -485,13 +485,30 @@ void free_initrd_mem(unsigned long start, unsigned long end) > #endif > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > +int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type) > { > pg_data_t *pgdat; > unsigned long start_pfn = PFN_DOWN(start); > unsigned long nr_pages = size >> PAGE_SHIFT; > + bool for_device = false; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + break; > + case MEMORY_DEVICE_PERSISTENT: > + for_device = true; > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > pgdat = NODE_DATA(nid); > > /* We only have ZONE_NORMAL, so this is easy.. */ > @@ -516,13 +533,27 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); > #endif > > #ifdef CONFIG_MEMORY_HOTREMOVE > -int arch_remove_memory(u64 start, u64 size) > +int arch_remove_memory(u64 start, u64 size, enum memory_type type) > { > unsigned long start_pfn = PFN_DOWN(start); > unsigned long nr_pages = size >> PAGE_SHIFT; > struct zone *zone; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + case MEMORY_DEVICE_PERSISTENT: > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > zone = page_zone(pfn_to_page(start_pfn)); > ret = __remove_pages(zone, start_pfn, nr_pages); > if (unlikely(ret)) > diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c > index c68078f..811d631 100644 > --- a/arch/x86/mm/init_32.c > +++ b/arch/x86/mm/init_32.c > @@ -826,24 +826,57 @@ void __init mem_init(void) > } > > #ifdef CONFIG_MEMORY_HOTPLUG > -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > +int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type) > { > struct pglist_data *pgdata = NODE_DATA(nid); > - struct zone *zone = pgdata->node_zones + > - zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device); > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > + bool for_device = false; > + struct zone *zone; > + > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + break; > + case MEMORY_DEVICE_PERSISTENT: > + for_device = true; > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > + zone = pgdata->node_zones + > + zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device); > > return __add_pages(nid, zone, start_pfn, nr_pages); > } > > #ifdef CONFIG_MEMORY_HOTREMOVE > -int arch_remove_memory(u64 start, u64 size) > +int arch_remove_memory(u64 start, u64 size, enum memory_type type) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > struct zone *zone; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE > + * is not supported on this architecture. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + case MEMORY_DEVICE_PERSISTENT: > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > zone = page_zone(pfn_to_page(start_pfn)); > return __remove_pages(zone, start_pfn, nr_pages); > } > diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c > index 7eef172..6c0b24e 100644 > --- a/arch/x86/mm/init_64.c > +++ b/arch/x86/mm/init_64.c > @@ -641,15 +641,33 @@ static void update_end_of_memory_vars(u64 start, u64 size) > * Memory is added always to NORMAL zone. This means you will never get > * additional DMA/DMA32 memory. > */ > -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) > +int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type) > { > struct pglist_data *pgdat = NODE_DATA(nid); > - struct zone *zone = pgdat->node_zones + > - zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > + bool for_device = false; > + struct zone *zone; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + break; > + case MEMORY_DEVICE_PERSISTENT: > + for_device = true; > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > + zone = pgdat->node_zones + > + zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); > + > init_memory_mapping(start, start + size); > > ret = __add_pages(nid, zone, start_pfn, nr_pages); > @@ -946,7 +964,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) > remove_pagetable(start, end, true); > } > > -int __ref arch_remove_memory(u64 start, u64 size) > +int __ref arch_remove_memory(u64 start, u64 size, enum memory_type type) > { > unsigned long start_pfn = start >> PAGE_SHIFT; > unsigned long nr_pages = size >> PAGE_SHIFT; > @@ -955,6 +973,19 @@ int __ref arch_remove_memory(u64 start, u64 size) > struct zone *zone; > int ret; > > + /* > + * Each memory_type needs special handling, so error out on an > + * unsupported type. > + */ > + switch (type) { > + case MEMORY_NORMAL: > + case MEMORY_DEVICE_PERSISTENT: > + break; > + default: > + pr_err("hotplug unsupported memory type %d\n", type); > + return -EINVAL; > + } > + > /* With altmap the first mapped page is offset from @start */ > altmap = to_vmem_altmap((unsigned long) page); > if (altmap) > diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h > index 134a2f6..c3999f2 100644 > --- a/include/linux/memory_hotplug.h > +++ b/include/linux/memory_hotplug.h > @@ -13,6 +13,26 @@ struct mem_section; > struct memory_block; > struct resource; > > +/* > + * When hotplugging memory with arch_add_memory(), we want more information on > + * the type of memory we are hotplugging, because depending on the type of > + * architecture, the code might want to take different paths. > + * > + * MEMORY_NORMAL: > + * Your regular system memory. Default common case. > + * > + * MEMORY_DEVICE_PERSISTENT: > + * Persistent device memory (pmem): struct page might be allocated in different > + * memory and architecture might want to perform special actions. It is similar > + * to regular memory, in that the CPU can access it transparently. However, > + * it is likely to have different bandwidth and latency than regular memory. > + * See Documentation/nvdimm/nvdimm.txt for more information. > + */ > +enum memory_type { > + MEMORY_NORMAL = 0, > + MEMORY_DEVICE_PERSISTENT, > +}; > + > #ifdef CONFIG_MEMORY_HOTPLUG > > /* > @@ -104,7 +124,7 @@ extern bool memhp_auto_online; > > #ifdef CONFIG_MEMORY_HOTREMOVE > extern bool is_pageblock_removable_nolock(struct page *page); > -extern int arch_remove_memory(u64 start, u64 size); > +extern int arch_remove_memory(u64 start, u64 size, enum memory_type type); > extern int __remove_pages(struct zone *zone, unsigned long start_pfn, > unsigned long nr_pages); > #endif /* CONFIG_MEMORY_HOTREMOVE */ > @@ -276,7 +296,7 @@ extern int add_memory(int nid, u64 start, u64 size); > extern int add_memory_resource(int nid, struct resource *resource, bool online); > extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, > bool for_device); > -extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); > +extern int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type); > extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); > extern bool is_memblock_offlined(struct memory_block *mem); > extern void remove_memory(int nid, u64 start, u64 size); > diff --git a/include/linux/memremap.h b/include/linux/memremap.h > index 9341619..1f720f7 100644 > --- a/include/linux/memremap.h > +++ b/include/linux/memremap.h > @@ -41,12 +41,14 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) > * @res: physical address range covered by @ref > * @ref: reference count that pins the devm_memremap_pages() mapping > * @dev: host device of the mapping for debug > + * @type: memory type see MEMORY_* in memory_hotplug.h > */ > struct dev_pagemap { > struct vmem_altmap *altmap; > const struct resource *res; > struct percpu_ref *ref; > struct device *dev; > + enum memory_type type; > }; > > #ifdef CONFIG_ZONE_DEVICE > diff --git a/kernel/memremap.c b/kernel/memremap.c > index 07e85e5..6b4505d 100644 > --- a/kernel/memremap.c > +++ b/kernel/memremap.c > @@ -248,7 +248,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) > align_size = ALIGN(resource_size(res), SECTION_SIZE); > > mem_hotplug_begin(); > - arch_remove_memory(align_start, align_size); > + arch_remove_memory(align_start, align_size, pgmap->type); > mem_hotplug_done(); > > untrack_pfn(NULL, PHYS_PFN(align_start), align_size); > @@ -326,6 +326,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, > } > pgmap->ref = ref; > pgmap->res = &page_map->res; > + pgmap->type = MEMORY_DEVICE_PERSISTENT; > > mutex_lock(&pgmap_lock); > error = 0; > @@ -363,7 +364,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, > goto err_pfn_remap; > > mem_hotplug_begin(); > - error = arch_add_memory(nid, align_start, align_size, true); > + error = arch_add_memory(nid, align_start, align_size, pgmap->type); > mem_hotplug_done(); > if (error) > goto err_add_memory; > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c > index a07a07c..d1a4326 100644 > --- a/mm/memory_hotplug.c > +++ b/mm/memory_hotplug.c > @@ -1384,7 +1384,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) > } > > /* call arch's memory hotadd */ > - ret = arch_add_memory(nid, start, size, false); > + ret = arch_add_memory(nid, start, size, MEMORY_NORMAL); > > if (ret < 0) > goto error; > @@ -2188,7 +2188,7 @@ void __ref remove_memory(int nid, u64 start, u64 size) > memblock_free(start, size); > memblock_remove(start, size); > > - arch_remove_memory(start, size); > + arch_remove_memory(start, size, MEMORY_NORMAL); > > try_offline_node(nid); > > -- > 2.9.3 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>