Re: [PATCH v2 4/4] xen: add helpers to allocate unpopulated memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



CCing Dan

On 24.07.20 14:42, Roger Pau Monne wrote:
> To be used in order to create foreign mappings. This is based on the
> ZONE_DEVICE facility which is used by persistent memory devices in
> order to create struct pages and kernel virtual mappings for the IOMEM
> areas of such devices. Note that on kernels without support for
> ZONE_DEVICE Xen will fallback to use ballooned pages in order to
> create foreign mappings.
> 
> The newly added helpers use the same parameters as the existing
> {alloc/free}_xenballooned_pages functions, which allows for in-place
> replacement of the callers. Once a memory region has been added to be
> used as scratch mapping space it will no longer be released, and pages
> returned are kept in a linked list. This allows to have a buffer of
> pages and prevents resorting to frequent additions and removals of
> regions.
> 
> If enabled (because ZONE_DEVICE is supported) the usage of the new
> functionality untangles Xen balloon and RAM hotplug from the usage of
> unpopulated physical memory ranges to map foreign pages, which is the
> correct thing to do in order to avoid mappings of foreign pages depend
> on memory hotplug.
> 
> Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
> ---
> I've not added a new memory_type type and just used
> MEMORY_DEVICE_DEVDAX which seems to be what we want for such memory
> regions. I'm unsure whether abusing this type is fine, or if I should
> instead add a specific type, maybe MEMORY_DEVICE_GENERIC? I don't
> think we should be using a specific Xen type at all.
> ---
> Cc: Oleksandr Andrushchenko <oleksandr_andrushchenko@xxxxxxxx>
> Cc: David Airlie <airlied@xxxxxxxx>
> Cc: Daniel Vetter <daniel@xxxxxxxx>
> Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Cc: Juergen Gross <jgross@xxxxxxxx>
> Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
> Cc: Dan Carpenter <dan.carpenter@xxxxxxxxxx>
> Cc: Roger Pau Monne <roger.pau@xxxxxxxxxx>
> Cc: Wei Liu <wl@xxxxxxx>
> Cc: Yan Yankovskyi <yyankovskyi@xxxxxxxxx>
> Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx
> Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx
> Cc: linux-mm@xxxxxxxxx
> Cc: David Hildenbrand <david@xxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxxxx>
> ---
>  drivers/gpu/drm/xen/xen_drm_front_gem.c |   8 +-
>  drivers/xen/Makefile                    |   1 +
>  drivers/xen/balloon.c                   |   4 +-
>  drivers/xen/grant-table.c               |   4 +-
>  drivers/xen/privcmd.c                   |   4 +-
>  drivers/xen/unpopulated-alloc.c         | 222 ++++++++++++++++++++++++
>  drivers/xen/xenbus/xenbus_client.c      |   6 +-
>  drivers/xen/xlate_mmu.c                 |   4 +-
>  include/xen/xen.h                       |   8 +
>  9 files changed, 246 insertions(+), 15 deletions(-)
>  create mode 100644 drivers/xen/unpopulated-alloc.c
> 
> diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c
> index f0b85e094111..9dd06eae767a 100644
> --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c
> +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
> @@ -99,8 +99,8 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size)
>  		 * allocate ballooned pages which will be used to map
>  		 * grant references provided by the backend
>  		 */
> -		ret = alloc_xenballooned_pages(xen_obj->num_pages,
> -					       xen_obj->pages);
> +		ret = xen_alloc_unpopulated_pages(xen_obj->num_pages,
> +					          xen_obj->pages);
>  		if (ret < 0) {
>  			DRM_ERROR("Cannot allocate %zu ballooned pages: %d\n",
>  				  xen_obj->num_pages, ret);
> @@ -152,8 +152,8 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj)
>  	} else {
>  		if (xen_obj->pages) {
>  			if (xen_obj->be_alloc) {
> -				free_xenballooned_pages(xen_obj->num_pages,
> -							xen_obj->pages);
> +				xen_free_unpopulated_pages(xen_obj->num_pages,
> +							   xen_obj->pages);
>  				gem_free_pages_array(xen_obj);
>  			} else {
>  				drm_gem_put_pages(&xen_obj->base,
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 0d322f3d90cd..788a5d9c8ef0 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -42,3 +42,4 @@ xen-gntdev-$(CONFIG_XEN_GNTDEV_DMABUF)	+= gntdev-dmabuf.o
>  xen-gntalloc-y				:= gntalloc.o
>  xen-privcmd-y				:= privcmd.o privcmd-buf.o
>  obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF)	+= xen-front-pgdir-shbuf.o
> +obj-$(CONFIG_ZONE_DEVICE)		+= unpopulated-alloc.o
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index b1d8b028bf80..815ef10eb2ff 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -654,7 +654,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
>  }
>  EXPORT_SYMBOL(free_xenballooned_pages);
>  
> -#ifdef CONFIG_XEN_PV
> +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_ZONE_DEVICE)
>  static void __init balloon_add_region(unsigned long start_pfn,
>  				      unsigned long pages)
>  {
> @@ -708,7 +708,7 @@ static int __init balloon_init(void)
>  	register_sysctl_table(xen_root);
>  #endif
>  
> -#ifdef CONFIG_XEN_PV
> +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_ZONE_DEVICE)
>  	{
>  		int i;
>  
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index 8d06bf1cc347..523dcdf39cc9 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -801,7 +801,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
>  {
>  	int ret;
>  
> -	ret = alloc_xenballooned_pages(nr_pages, pages);
> +	ret = xen_alloc_unpopulated_pages(nr_pages, pages);
>  	if (ret < 0)
>  		return ret;
>  
> @@ -836,7 +836,7 @@ EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
>  void gnttab_free_pages(int nr_pages, struct page **pages)
>  {
>  	gnttab_pages_clear_private(nr_pages, pages);
> -	free_xenballooned_pages(nr_pages, pages);
> +	xen_free_unpopulated_pages(nr_pages, pages);
>  }
>  EXPORT_SYMBOL_GPL(gnttab_free_pages);
>  
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index a250d118144a..56000ab70974 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -425,7 +425,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
>  	if (pages == NULL)
>  		return -ENOMEM;
>  
> -	rc = alloc_xenballooned_pages(numpgs, pages);
> +	rc = xen_alloc_unpopulated_pages(numpgs, pages);
>  	if (rc != 0) {
>  		pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
>  			numpgs, rc);
> @@ -900,7 +900,7 @@ static void privcmd_close(struct vm_area_struct *vma)
>  
>  	rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
>  	if (rc == 0)
> -		free_xenballooned_pages(numpgs, pages);
> +		xen_free_unpopulated_pages(numpgs, pages);
>  	else
>  		pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
>  			numpgs, rc);
> diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
> new file mode 100644
> index 000000000000..aaa91cefbbf9
> --- /dev/null
> +++ b/drivers/xen/unpopulated-alloc.c
> @@ -0,0 +1,222 @@
> +/*
> + * Helpers to allocate unpopulated memory for foreign mappings
> + *
> + * Copyright (c) 2020, Citrix Systems R&D
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this source file (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy, modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
> + * and to permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <linux/errno.h>
> +#include <linux/gfp.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/memremap.h>
> +#include <linux/slab.h>
> +
> +#include <asm/page.h>
> +
> +#include <xen/page.h>
> +#include <xen/xen.h>
> +
> +static DEFINE_MUTEX(lock);
> +static LIST_HEAD(list);
> +static unsigned int count;
> +
> +static int fill(unsigned int nr_pages)
> +{
> +	struct dev_pagemap *pgmap;
> +	void *vaddr;
> +	unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
> +	int nid, ret;
> +
> +	pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
> +	if (!pgmap)
> +		return -ENOMEM;
> +
> +	pgmap->type = MEMORY_DEVICE_DEVDAX;
> +	pgmap->res.name = "XEN SCRATCH";
> +	pgmap->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> +
> +	ret = allocate_resource(&iomem_resource, &pgmap->res,
> +				alloc_pages * PAGE_SIZE, 0, -1,
> +				PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
> +	if (ret < 0) {
> +		pr_err("Cannot allocate new IOMEM resource\n");
> +		kfree(pgmap);
> +		return ret;
> +	}
> +
> +	nid = memory_add_physaddr_to_nid(pgmap->res.start);
> +
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> +	/*
> +	 * We don't support PV MMU when Linux and Xen is using
> +	 * different page granularity.
> +	 */
> +	BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> +
> +        /*
> +         * memremap will build page tables for the new memory so
> +         * the p2m must contain invalid entries so the correct
> +         * non-present PTEs will be written.
> +         *
> +         * If a failure occurs, the original (identity) p2m entries
> +         * are not restored since this region is now known not to
> +         * conflict with any devices.
> +         */
> +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +		xen_pfn_t pfn = PFN_DOWN(pgmap->res.start);
> +
> +		for (i = 0; i < alloc_pages; i++) {
> +			if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
> +				pr_warn("set_phys_to_machine() failed, no memory added\n");
> +				release_resource(&pgmap->res);
> +				kfree(pgmap);
> +				return -ENOMEM;
> +			}
> +                }
> +	}
> +#endif
> +
> +	vaddr = memremap_pages(pgmap, nid);
> +	if (IS_ERR(vaddr)) {
> +		pr_err("Cannot remap memory range\n");
> +		release_resource(&pgmap->res);
> +		kfree(pgmap);
> +		return PTR_ERR(vaddr);
> +	}
> +
> +	for (i = 0; i < alloc_pages; i++) {
> +		struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i);
> +
> +		BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i));
> +		list_add(&pg->lru, &list);
> +		count++;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * xen_alloc_unpopulated_pages - alloc unpopulated pages
> + * @nr_pages: Number of pages
> + * @pages: pages returned
> + * @return 0 on success, error otherwise
> + */
> +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages)
> +{
> +	unsigned int i;
> +	int ret = 0;
> +
> +	mutex_lock(&lock);
> +	if (count < nr_pages) {
> +		ret = fill(nr_pages);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	for (i = 0; i < nr_pages; i++) {
> +		struct page *pg = list_first_entry_or_null(&list, struct page,
> +							   lru);
> +
> +		BUG_ON(!pg);
> +		list_del(&pg->lru);
> +		count--;
> +		pages[i] = pg;
> +
> +#ifdef CONFIG_XEN_HAVE_PVMMU
> +		/*
> +		 * We don't support PV MMU when Linux and Xen is using
> +		 * different page granularity.
> +		 */
> +		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
> +
> +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> +			ret = xen_alloc_p2m_entry(page_to_pfn(pg));
> +			if (ret < 0) {
> +				unsigned int j;
> +
> +				for (j = 0; j <= i; j++) {
> +					list_add(&pages[j]->lru, &list);
> +					count++;
> +				}
> +				goto out;
> +			}
> +		}
> +#endif
> +	}
> +
> +out:
> +	mutex_unlock(&lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL(xen_alloc_unpopulated_pages);
> +
> +/**
> + * xen_free_unpopulated_pages - return unpopulated pages
> + * @nr_pages: Number of pages
> + * @pages: pages to return
> + */
> +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages)
> +{
> +	unsigned int i;
> +
> +	mutex_lock(&lock);
> +	for (i = 0; i < nr_pages; i++) {
> +		list_add(&pages[i]->lru, &list);
> +		count++;
> +	}
> +	mutex_unlock(&lock);
> +}
> +EXPORT_SYMBOL(xen_free_unpopulated_pages);
> +
> +#ifdef CONFIG_XEN_PV
> +static int __init init(void)
> +{
> +	unsigned int i;
> +
> +	if (!xen_domain())
> +		return -ENODEV;
> +
> +	/*
> +	 * Initialize with pages from the extra memory regions (see
> +	 * arch/x86/xen/setup.c).
> +	 */
> +	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
> +		unsigned int j;
> +
> +		for (j = 0; j < xen_extra_mem[i].n_pfns; j++) {
> +			struct page *pg =
> +				pfn_to_page(xen_extra_mem[i].start_pfn + j);
> +
> +			list_add(&pg->lru, &list);
> +			count++;
> +		}
> +	}
> +
> +	return 0;
> +}
> +subsys_initcall(init);
> +#endif
> diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
> index 786fbb7d8be0..70b6c4780fbd 100644
> --- a/drivers/xen/xenbus/xenbus_client.c
> +++ b/drivers/xen/xenbus/xenbus_client.c
> @@ -615,7 +615,7 @@ static int xenbus_map_ring_hvm(struct xenbus_device *dev,
>  	bool leaked = false;
>  	unsigned int nr_pages = XENBUS_PAGES(nr_grefs);
>  
> -	err = alloc_xenballooned_pages(nr_pages, node->hvm.pages);
> +	err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages);
>  	if (err)
>  		goto out_err;
>  
> @@ -656,7 +656,7 @@ static int xenbus_map_ring_hvm(struct xenbus_device *dev,
>  			 addr, nr_pages);
>   out_free_ballooned_pages:
>  	if (!leaked)
> -		free_xenballooned_pages(nr_pages, node->hvm.pages);
> +		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
>   out_err:
>  	return err;
>  }
> @@ -852,7 +852,7 @@ static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr)
>  			       info.addrs);
>  	if (!rv) {
>  		vunmap(vaddr);
> -		free_xenballooned_pages(nr_pages, node->hvm.pages);
> +		xen_free_unpopulated_pages(nr_pages, node->hvm.pages);
>  	}
>  	else
>  		WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages);
> diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
> index 7b1077f0abcb..34742c6e189e 100644
> --- a/drivers/xen/xlate_mmu.c
> +++ b/drivers/xen/xlate_mmu.c
> @@ -232,7 +232,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
>  		kfree(pages);
>  		return -ENOMEM;
>  	}
> -	rc = alloc_xenballooned_pages(nr_pages, pages);
> +	rc = xen_alloc_unpopulated_pages(nr_pages, pages);
>  	if (rc) {
>  		pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__,
>  			nr_pages, rc);
> @@ -249,7 +249,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
>  	if (!vaddr) {
>  		pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__,
>  			nr_pages, rc);
> -		free_xenballooned_pages(nr_pages, pages);
> +		xen_free_unpopulated_pages(nr_pages, pages);
>  		kfree(pages);
>  		kfree(pfns);
>  		return -ENOMEM;
> diff --git a/include/xen/xen.h b/include/xen/xen.h
> index 19a72f591e2b..aa33bc0d933c 100644
> --- a/include/xen/xen.h
> +++ b/include/xen/xen.h
> @@ -52,4 +52,12 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
>  extern u64 xen_saved_max_mem_size;
>  #endif
>  
> +#ifdef CONFIG_ZONE_DEVICE
> +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages);
> +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);
> +#else
> +#define xen_alloc_unpopulated_pages alloc_xenballooned_pages
> +#define xen_free_unpopulated_pages free_xenballooned_pages
> +#endif
> +
>  #endif	/* _XEN_XEN_H */
> 



-- 
Thanks,

David / dhildenb






[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux