The patch titled Subject: device-dax: add resize support has been added to the -mm tree. Its filename is device-dax-add-resize-support.patch This patch should soon appear at https://ozlabs.org/~akpm/mmots/broken-out/device-dax-add-resize-support.patch and later at https://ozlabs.org/~akpm/mmotm/broken-out/device-dax-add-resize-support.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Dan Williams <dan.j.williams@xxxxxxxxx> Subject: device-dax: add resize support Make the device-dax 'size' attribute writable to allow capacity to be split between multiple instances in a region. The intended consumers of this capability are users that want to split a scarce memory resource between device-dax and System-RAM access, or users that want to have multiple security domains for a large region. By default the hmem instance provider allocates an entire region to the first instance. The process of creating a new instance (assuming a region-id of 0) is find the region and trigger the 'create' attribute which yields an empty instance to configure. For example: cd /sys/bus/dax/devices echo dax0.0 > dax0.0/driver/unbind echo $new_size > dax0.0/size echo 1 > $(readlink -f dax0.0)../dax_region/create seed=$(cat $(readlink -f dax0.0)../dax_region/seed) echo $new_size > $seed/size echo dax0.0 > ../drivers/{device_dax,kmem}/bind echo dax0.1 > ../drivers/{device_dax,kmem}/bind Instances can be destroyed by: echo $device > $(readlink -f $device)../dax_region/delete Link: https://lkml.kernel.org/r/159643102625.4062302.7431838945566033852.stgit@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Link: https://lkml.kernel.org/r/160106115239.30709.9850106928133493138.stgit@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Vishal Verma <vishal.l.verma@xxxxxxxxx> Cc: Brice Goglin <Brice.Goglin@xxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Dave Jiang <dave.jiang@xxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Ira Weiny <ira.weiny@xxxxxxxxx> Cc: Jia He <justin.he@xxxxxxx> Cc: Joao Martins <joao.m.martins@xxxxxxxxxx> Cc: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> Cc: Ard Biesheuvel <ardb@xxxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: Ben Skeggs <bskeggs@xxxxxxxxxx> Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Daniel Vetter <daniel@xxxxxxxx> Cc: David Airlie <airlied@xxxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Hulk Robot <hulkci@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxxxxxx> Cc: Jason Yan <yanaijie@xxxxxxxxxx> Cc: Jeff Moyer <jmoyer@xxxxxxxxxx> Cc: "Jérôme Glisse" <jglisse@xxxxxxxxxx> Cc: Juergen Gross <jgross@xxxxxxxx> Cc: kernel test robot <lkp@xxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx> Cc: Paul Mackerras <paulus@xxxxxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: "Rafael J. Wysocki" <rafael.j.wysocki@xxxxxxxxx> Cc: Randy Dunlap <rdunlap@xxxxxxxxxxxxx> Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Tom Lendacky <thomas.lendacky@xxxxxxx> Cc: Vivek Goyal <vgoyal@xxxxxxxxxx> Cc: Wei Yang <richard.weiyang@xxxxxxxxxxxxxxxxx> Cc: Will Deacon <will@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- drivers/dax/bus.c | 161 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 152 insertions(+), 9 deletions(-) --- a/drivers/dax/bus.c~device-dax-add-resize-support +++ a/drivers/dax/bus.c @@ -6,6 +6,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/dax.h> +#include <linux/io.h> #include "dax-private.h" #include "bus.h" @@ -562,7 +563,8 @@ struct dax_region *alloc_dax_region(stru } EXPORT_SYMBOL_GPL(alloc_dax_region); -static int alloc_dev_dax_range(struct dev_dax *dev_dax, resource_size_t size) +static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, + resource_size_t size) { struct dax_region *dax_region = dev_dax->region; struct resource *res = &dax_region->res; @@ -580,12 +582,7 @@ static int alloc_dev_dax_range(struct de return 0; } - /* TODO: handle multiple allocations per region */ - if (res->child) - return -ENOMEM; - - alloc = __request_region(res, res->start, size, dev_name(dev), 0); - + alloc = __request_region(res, start, size, dev_name(dev), 0); if (!alloc) return -ENOMEM; @@ -597,6 +594,29 @@ static int alloc_dev_dax_range(struct de return 0; } +static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) +{ + struct dax_region *dax_region = dev_dax->region; + struct range *range = &dev_dax->range; + int rc = 0; + + device_lock_assert(dax_region->dev); + + if (size) + rc = adjust_resource(res, range->start, size); + else + __release_region(&dax_region->res, range->start, range_len(range)); + if (rc) + return rc; + + dev_dax->range = (struct range) { + .start = range->start, + .end = range->start + size - 1, + }; + + return 0; +} + static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -605,7 +625,127 @@ static ssize_t size_show(struct device * return sprintf(buf, "%llu\n", size); } -static DEVICE_ATTR_RO(size); + +static bool alloc_is_aligned(struct dax_region *dax_region, + resource_size_t size) +{ + /* + * The minimum mapping granularity for a device instance is a + * single subsection, unless the arch says otherwise. + */ + return IS_ALIGNED(size, max_t(unsigned long, dax_region->align, + memremap_compat_align())); +} + +static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) +{ + struct dax_region *dax_region = dev_dax->region; + struct range *range = &dev_dax->range; + struct resource *res, *adjust = NULL; + struct device *dev = &dev_dax->dev; + + for_each_dax_region_resource(dax_region, res) + if (strcmp(res->name, dev_name(dev)) == 0 + && res->start == range->start) { + adjust = res; + break; + } + + if (dev_WARN_ONCE(dev, !adjust, "failed to find matching resource\n")) + return -ENXIO; + return adjust_dev_dax_range(dev_dax, adjust, size); +} + +static ssize_t dev_dax_resize(struct dax_region *dax_region, + struct dev_dax *dev_dax, resource_size_t size) +{ + resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; + resource_size_t dev_size = range_len(&dev_dax->range); + struct resource *region_res = &dax_region->res; + struct device *dev = &dev_dax->dev; + const char *name = dev_name(dev); + struct resource *res, *first; + + if (dev->driver) + return -EBUSY; + if (size == dev_size) + return 0; + if (size > dev_size && size - dev_size > avail) + return -ENOSPC; + if (size < dev_size) + return dev_dax_shrink(dev_dax, size); + + to_alloc = size - dev_size; + if (dev_WARN_ONCE(dev, !alloc_is_aligned(dax_region, to_alloc), + "resize of %pa misaligned\n", &to_alloc)) + return -ENXIO; + + /* + * Expand the device into the unused portion of the region. This + * may involve adjusting the end of an existing resource, or + * allocating a new resource. + */ + first = region_res->child; + if (!first) + return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); + for (res = first; to_alloc && res; res = res->sibling) { + struct resource *next = res->sibling; + resource_size_t free; + + /* space at the beginning of the region */ + free = 0; + if (res == first && res->start > dax_region->res.start) + free = res->start - dax_region->res.start; + if (free >= to_alloc && dev_size == 0) + return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); + + free = 0; + /* space between allocations */ + if (next && next->start > res->end + 1) + free = next->start - res->end + 1; + + /* space at the end of the region */ + if (free < to_alloc && !next && res->end < region_res->end) + free = region_res->end - res->end; + + if (free >= to_alloc && strcmp(name, res->name) == 0) + return adjust_dev_dax_range(dev_dax, res, resource_size(res) + to_alloc); + else if (free >= to_alloc && dev_size == 0) + return alloc_dev_dax_range(dev_dax, res->end + 1, to_alloc); + } + return -ENOSPC; +} + +static ssize_t size_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + ssize_t rc; + unsigned long long val; + struct dev_dax *dev_dax = to_dev_dax(dev); + struct dax_region *dax_region = dev_dax->region; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + if (!alloc_is_aligned(dax_region, val)) { + dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); + return -EINVAL; + } + + device_lock(dax_region->dev); + if (!dax_region->dev->driver) { + device_unlock(dax_region->dev); + return -ENXIO; + } + device_lock(dev); + rc = dev_dax_resize(dax_region, dev_dax, val); + device_unlock(dev); + device_unlock(dax_region->dev); + + return rc == 0 ? len : rc; +} +static DEVICE_ATTR_RW(size); static int dev_dax_target_node(struct dev_dax *dev_dax) { @@ -654,11 +794,14 @@ static umode_t dev_dax_visible(struct ko { struct device *dev = container_of(kobj, struct device, kobj); struct dev_dax *dev_dax = to_dev_dax(dev); + struct dax_region *dax_region = dev_dax->region; if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) return 0; if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) return 0; + if (a == &dev_attr_size.attr && is_static(dax_region)) + return 0444; return a->mode; } @@ -739,7 +882,7 @@ struct dev_dax *devm_create_dev_dax(stru device_initialize(dev); dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); - rc = alloc_dev_dax_range(dev_dax, data->size); + rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); if (rc) goto err_range; _ Patches currently in -mm which might be from dan.j.williams@xxxxxxxxx are x86-numa-cleanup-configuration-dependent-command-line-options.patch x86-numa-add-nohmat-option.patch efi-fake_mem-arrange-for-a-resource-entry-per-efi_fake_mem-instance.patch acpi-hmat-refactor-hmat_register_target_device-to-hmem_register_device.patch resource-report-parent-to-walk_iomem_res_desc-callback.patch mm-memory_hotplug-introduce-default-phys_to_target_node-implementation.patch acpi-hmat-attach-a-device-for-each-soft-reserved-range.patch device-dax-drop-the-dax_regionpfn_flags-attribute.patch device-dax-move-instance-creation-parameters-to-struct-dev_dax_data.patch device-dax-make-pgmap-optional-for-instance-creation.patch device-dax-kmem-introduce-dax_kmem_range.patch device-dax-kmem-move-resource-name-tracking-to-drvdata.patch device-dax-kmem-replace-release_resource-with-release_mem_region.patch device-dax-add-an-allocation-interface-for-device-dax-instances.patch device-dax-introduce-struct-dev_dax-typed-driver-operations.patch device-dax-introduce-seed-devices.patch drivers-base-make-device_find_child_by_name-compatible-with-sysfs-inputs.patch device-dax-add-resize-support.patch mm-memremap_pages-convert-to-struct-range.patch mm-memremap_pages-support-multiple-ranges-per-invocation.patch device-dax-add-dis-contiguous-resource-support.patch device-dax-introduce-mapping-devices.patch device-dax-add-an-align-attribute.patch