Re: [HMM v13 01/18] mm/memory/hotplug: convert device parameter bool to set of flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 19/11/16 05:18, Jérôme Glisse wrote:
> Only usefull for arch where we support ZONE_DEVICE and where we want to
> also support un-addressable device memory. We need struct page for such
> un-addressable memory. But we should avoid populating the kernel linear
> mapping for the physical address range because there is no real memory
> or anything behind those physical address.
> 
> Hence we need more flags than just knowing if it is device memory or not.
> 


Isn't it better to add a wrapper to arch_add/remove_memory and do those
checks inside and then call arch_add/remove_memory to reduce the churn.
If you need selectively enable MEMORY_UNADDRESSABLE that can be done with
_ARCH_HAS_FEATURE

> Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx>
> Cc: Russell King <linux@xxxxxxxxxxxxxxx>
> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
> Cc: Paul Mackerras <paulus@xxxxxxxxx>
> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
> Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
> Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
> Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxxxxxxxxx>
> Cc: Rich Felker <dalias@xxxxxxxx>
> Cc: Chris Metcalf <cmetcalf@xxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> ---
>  arch/ia64/mm/init.c            | 19 ++++++++++++++++---
>  arch/powerpc/mm/mem.c          | 18 +++++++++++++++---
>  arch/s390/mm/init.c            | 10 ++++++++--
>  arch/sh/mm/init.c              | 18 +++++++++++++++---
>  arch/tile/mm/init.c            | 10 ++++++++--
>  arch/x86/mm/init_32.c          | 19 ++++++++++++++++---
>  arch/x86/mm/init_64.c          | 19 ++++++++++++++++---
>  include/linux/memory_hotplug.h | 17 +++++++++++++++--
>  kernel/memremap.c              |  4 ++--
>  mm/memory_hotplug.c            |  4 ++--
>  10 files changed, 113 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> index 1841ef6..95a2fa5 100644
> --- a/arch/ia64/mm/init.c
> +++ b/arch/ia64/mm/init.c
> @@ -645,7 +645,7 @@ mem_init (void)
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
> +int arch_add_memory(int nid, u64 start, u64 size, int flags)
>  {
>  	pg_data_t *pgdat;
>  	struct zone *zone;
> @@ -653,10 +653,17 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	int ret;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	pgdat = NODE_DATA(nid);
>  
>  	zone = pgdat->node_zones +
> -		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
> +		zone_for_memory(nid, start, size, ZONE_NORMAL,
> +				flags & MEMORY_DEVICE);
>  	ret = __add_pages(nid, zone, start_pfn, nr_pages);
>  
>  	if (ret)
> @@ -667,13 +674,19 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> +int arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	struct zone *zone;
>  	int ret;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	zone = page_zone(pfn_to_page(start_pfn));
>  	ret = __remove_pages(zone, start_pfn, nr_pages);
>  	if (ret)
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 5f84433..e3c0532 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -126,7 +126,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
>  	return -ENODEV;
>  }
>  
> -int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
> +int arch_add_memory(int nid, u64 start, u64 size, int flags)
>  {
>  	struct pglist_data *pgdata;
>  	struct zone *zone;
> @@ -134,6 +134,12 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	int rc;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	pgdata = NODE_DATA(nid);
>  
>  	start = (unsigned long)__va(start);
> @@ -147,18 +153,24 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
>  
>  	/* this should work for most non-highmem platforms */
>  	zone = pgdata->node_zones +
> -		zone_for_memory(nid, start, size, 0, for_device);
> +		zone_for_memory(nid, start, size, 0, flags & MEMORY_DEVICE);
>  
>  	return __add_pages(nid, zone, start_pfn, nr_pages);
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> +int arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	struct zone *zone;
>  	int ret;
> +	
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
>  
>  	zone = page_zone(pfn_to_page(start_pfn));
>  	ret = __remove_pages(zone, start_pfn, nr_pages);
> diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
> index f56a39b..4147b87 100644
> --- a/arch/s390/mm/init.c
> +++ b/arch/s390/mm/init.c
> @@ -149,7 +149,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
>  #endif
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
> +int arch_add_memory(int nid, u64 start, u64 size, int flags)
>  {
>  	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
>  	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
> @@ -158,6 +158,12 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
>  	unsigned long nr_pages;
>  	int rc, zone_enum;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	rc = vmem_add_mapping(start, size);
>  	if (rc)
>  		return rc;
> @@ -197,7 +203,7 @@ unsigned long memory_block_size_bytes(void)
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> +int arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	/*
>  	 * There is no hardware or firmware interface which could trigger a
> diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
> index 7549186..f72a402 100644
> --- a/arch/sh/mm/init.c
> +++ b/arch/sh/mm/init.c
> @@ -485,19 +485,25 @@ void free_initrd_mem(unsigned long start, unsigned long end)
>  #endif
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
> +int arch_add_memory(int nid, u64 start, u64 size, int flags)
>  {
>  	pg_data_t *pgdat;
>  	unsigned long start_pfn = PFN_DOWN(start);
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	int ret;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	pgdat = NODE_DATA(nid);
>  
>  	/* We only have ZONE_NORMAL, so this is easy.. */
>  	ret = __add_pages(nid, pgdat->node_zones +
>  			zone_for_memory(nid, start, size, ZONE_NORMAL,
> -			for_device),
> +					flags & MEMORY_DEVICE),
>  			start_pfn, nr_pages);
>  	if (unlikely(ret))
>  		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
> @@ -516,13 +522,19 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
>  #endif
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> +int arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	unsigned long start_pfn = PFN_DOWN(start);
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	struct zone *zone;
>  	int ret;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	zone = page_zone(pfn_to_page(start_pfn));
>  	ret = __remove_pages(zone, start_pfn, nr_pages);
>  	if (unlikely(ret))
> diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
> index adce254..5fd972c 100644
> --- a/arch/tile/mm/init.c
> +++ b/arch/tile/mm/init.c
> @@ -863,13 +863,19 @@ void __init mem_init(void)
>   * memory to the highmem for now.
>   */
>  #ifndef CONFIG_NEED_MULTIPLE_NODES
> -int arch_add_memory(u64 start, u64 size, bool for_device)
> +int arch_add_memory(u64 start, u64 size, int flags)
>  {
>  	struct pglist_data *pgdata = &contig_page_data;
>  	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	return __add_pages(zone, start_pfn, nr_pages);
>  }
>  
> @@ -879,7 +885,7 @@ int remove_memory(u64 start, u64 size)
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> +int arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	/* TODO */
>  	return -EBUSY;
> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
> index cf80590..16a9095 100644
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -816,24 +816,37 @@ void __init mem_init(void)
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
> -int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
> +int arch_add_memory(int nid, u64 start, u64 size, int flags)
>  {
>  	struct pglist_data *pgdata = NODE_DATA(nid);
>  	struct zone *zone = pgdata->node_zones +
> -		zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
> +		zone_for_memory(nid, start, size, ZONE_HIGHMEM,
> +				flags & MEMORY_DEVICE);
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	return __add_pages(nid, zone, start_pfn, nr_pages);
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> -int arch_remove_memory(u64 start, u64 size)
> +int arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	struct zone *zone;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	zone = page_zone(pfn_to_page(start_pfn));
>  	return __remove_pages(zone, start_pfn, nr_pages);
>  }
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 14b9dd7..8c4abb0 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -651,15 +651,22 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
>   * Memory is added always to NORMAL zone. This means you will never get
>   * additional DMA/DMA32 memory.
>   */
> -int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
> +int arch_add_memory(int nid, u64 start, u64 size, int flags)
>  {
>  	struct pglist_data *pgdat = NODE_DATA(nid);
>  	struct zone *zone = pgdat->node_zones +
> -		zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
> +		zone_for_memory(nid, start, size, ZONE_NORMAL,
> +				flags & MEMORY_DEVICE);
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
>  	int ret;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	init_memory_mapping(start, start + size);
>  
>  	ret = __add_pages(nid, zone, start_pfn, nr_pages);
> @@ -956,7 +963,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
>  	remove_pagetable(start, end, true);
>  }
>  
> -int __ref arch_remove_memory(u64 start, u64 size)
> +int __ref arch_remove_memory(u64 start, u64 size, int flags)
>  {
>  	unsigned long start_pfn = start >> PAGE_SHIFT;
>  	unsigned long nr_pages = size >> PAGE_SHIFT;
> @@ -965,6 +972,12 @@ int __ref arch_remove_memory(u64 start, u64 size)
>  	struct zone *zone;
>  	int ret;
>  
> +	/* Need to add support for device and unaddressable memory if needed */
> +	if (flags & MEMORY_UNADDRESSABLE) {
> +		BUG();
> +		return -EINVAL;
> +	}
> +
>  	/* With altmap the first mapped page is offset from @start */
>  	altmap = to_vmem_altmap((unsigned long) page);
>  	if (altmap)
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 01033fa..ba9b12e 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -103,7 +103,7 @@ extern bool memhp_auto_online;
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
>  extern bool is_pageblock_removable_nolock(struct page *page);
> -extern int arch_remove_memory(u64 start, u64 size);
> +extern int arch_remove_memory(u64 start, u64 size, int flags);
>  extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
>  	unsigned long nr_pages);
>  #endif /* CONFIG_MEMORY_HOTREMOVE */
> @@ -275,7 +275,20 @@ extern int add_memory(int nid, u64 start, u64 size);
>  extern int add_memory_resource(int nid, struct resource *resource, bool online);
>  extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
>  		bool for_device);
> -extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
> +
> +/*
> + * For device memory we want more informations than just knowing it is device
				     information
> + * memory. We want to know if we can migrate it (ie it is not storage memory
> + * use by DAX). Is it addressable by the CPU ? Some device memory like GPU
> + * memory can not be access by CPU but we still want struct page so that we
			accessed
> + * can use it like regular memory.

Can you please add some details on why -- migration needs them for example?

> + */
> +#define MEMORY_FLAGS_NONE 0
> +#define MEMORY_DEVICE (1 << 0)
> +#define MEMORY_MOVABLE (1 << 1)
> +#define MEMORY_UNADDRESSABLE (1 << 2)
> +
> +extern int arch_add_memory(int nid, u64 start, u64 size, int flags);
>  extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
>  extern bool is_memblock_offlined(struct memory_block *mem);
>  extern void remove_memory(int nid, u64 start, u64 size);
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index b501e39..07665eb 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -246,7 +246,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
>  	/* pages are dead and unused, undo the arch mapping */
>  	align_start = res->start & ~(SECTION_SIZE - 1);
>  	align_size = ALIGN(resource_size(res), SECTION_SIZE);
> -	arch_remove_memory(align_start, align_size);
> +	arch_remove_memory(align_start, align_size, MEMORY_DEVICE);
>  	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
>  	pgmap_radix_release(res);
>  	dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
> @@ -358,7 +358,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
>  	if (error)
>  		goto err_pfn_remap;
>  
> -	error = arch_add_memory(nid, align_start, align_size, true);
> +	error = arch_add_memory(nid, align_start, align_size, MEMORY_DEVICE);
>  	if (error)
>  		goto err_add_memory;
>  
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 9629273..b2942d7 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1386,7 +1386,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
>  	}
>  
>  	/* call arch's memory hotadd */
> -	ret = arch_add_memory(nid, start, size, false);
> +	ret = arch_add_memory(nid, start, size, MEMORY_FLAGS_NONE);
>  
>  	if (ret < 0)
>  		goto error;
> @@ -2205,7 +2205,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
>  	memblock_free(start, size);
>  	memblock_remove(start, size);
>  
> -	arch_remove_memory(start, size);
> +	arch_remove_memory(start, size, MEMORY_FLAGS_NONE);
>  
>  	try_offline_node(nid);
>  
> 

Balbir Singh.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]