On 8/16/24 7:44 AM, ira.weiny@xxxxxxxxx wrote: > From: Navneet Singh <navneet.singh@xxxxxxxxx> > > DAX regions which map dynamic capacity partitions require that memory be > allowed to come and go. Recall sparse regions were created for this > purpose. Now that extents can be realized within DAX regions the DAX > region driver can start tracking sub-resource information. > > The tight relationship between DAX region operations and extent > operations require memory changes to be controlled synchronously with > the user of the region. Synchronize through the dax_region_rwsem and by > having the region driver drive both the region device as well as the > extent sub-devices. > > Recall requests to remove extents can happen at any time and that a host > is not obligated to release the memory until it is not being used. If > an extent is not used allow a release response. > > The DAX layer has no need for the details of the CXL memory extent > devices. Expose extents to the DAX layer as device children of the DAX > region device. A single callback from the driver aids the DAX layer to > determine if the child device is an extent. The DAX layer also > registers a devres function to automatically clean up when the device is > removed from the region. > > There is a race between extents being surfaced and the dax_cxl driver > being loaded. The driver must therefore scan for any existing extents > while still under the device lock. > > Respond to extent notifications. Manage the DAX region resource tree > based on the extents lifetime. Return the status of remove > notifications to lower layers such that it can manage the hardware > appropriately. > > Signed-off-by: Navneet Singh <navneet.singh@xxxxxxxxx> > Co-developed-by: Ira Weiny <ira.weiny@xxxxxxxxx> > Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx> > > --- > Changes: > [iweiny: patch reorder] > [iweiny: move hunks from other patches to clarify code changes and > add/release flows WRT dax regions] > [iweiny: use %par] > [iweiny: clean up variable names] > [iweiny: Simplify sparse_ops] > [Fan: avoid open coding range_len()] > [djbw: s/reg_ext/region_extent] > --- > drivers/cxl/core/extent.c | 76 +++++++++++++-- > drivers/cxl/cxl.h | 6 ++ > drivers/dax/bus.c | 243 +++++++++++++++++++++++++++++++++++++++++----- > drivers/dax/bus.h | 3 +- > drivers/dax/cxl.c | 63 +++++++++++- > drivers/dax/dax-private.h | 34 +++++++ > drivers/dax/hmem/hmem.c | 2 +- > drivers/dax/pmem.c | 2 +- > 8 files changed, 391 insertions(+), 38 deletions(-) > > diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c > index d7d526a51e2b..103b0bec3a4a 100644 > --- a/drivers/cxl/core/extent.c > +++ b/drivers/cxl/core/extent.c > @@ -271,20 +271,67 @@ static void calc_hpa_range(struct cxl_endpoint_decoder *cxled, > hpa_range->end = hpa_range->start + range_len(dpa_range) - 1; > } > > +static int cxlr_notify_extent(struct cxl_region *cxlr, enum dc_event event, > + struct region_extent *region_extent) > +{ > + struct cxl_dax_region *cxlr_dax; > + struct device *dev; > + int rc = 0; > + > + cxlr_dax = cxlr->cxlr_dax; > + dev = &cxlr_dax->dev; > + dev_dbg(dev, "Trying notify: type %d HPA %par\n", > + event, ®ion_extent->hpa_range); > + > + /* > + * NOTE the lack of a driver indicates a notification has failed. No > + * user space coordiantion was possible. > + */ > + device_lock(dev); > + if (dev->driver) { > + struct cxl_driver *driver = to_cxl_drv(dev->driver); > + struct cxl_notify_data notify_data = (struct cxl_notify_data) { > + .event = event, > + .region_extent = region_extent, > + }; > + > + if (driver->notify) { > + dev_dbg(dev, "Notify: type %d HPA %par\n", > + event, ®ion_extent->hpa_range); > + rc = driver->notify(dev, ¬ify_data); > + } > + } > + device_unlock(dev); Maybe a cleaner version: guard(device)(dev); if (!dev->driver || !dev->driver->notify) return 0; dev_dbg(...); return driver->notify(dev, ¬ify_data); > + return rc; > +} > + > +struct rm_data { > + struct cxl_region *cxlr; > + struct range *range; > +}; > + > static int cxlr_rm_extent(struct device *dev, void *data) > { > struct region_extent *region_extent = to_region_extent(dev); > - struct range *region_hpa_range = data; > + struct rm_data *rm_data = data; > + int rc; > > if (!region_extent) > return 0; > > /* > - * Any extent which 'touches' the released range is removed. > + * Any extent which 'touches' the released range is attempted to be > + * removed. > */ > - if (range_overlaps(region_hpa_range, ®ion_extent->hpa_range)) { > + if (range_overlaps(rm_data->range, ®ion_extent->hpa_range)) { > + struct cxl_region *cxlr = rm_data->cxlr; > + > dev_dbg(dev, "Remove region extent HPA %par\n", > ®ion_extent->hpa_range); > + rc = cxlr_notify_extent(cxlr, DCD_RELEASE_CAPACITY, region_extent); > + if (rc == -EBUSY) > + return 0; > + /* Extent not in use or error, remove it */ > region_rm_extent(region_extent); > } > return 0; > @@ -312,8 +359,13 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent) > > calc_hpa_range(cxled, cxlr->cxlr_dax, &dpa_range, &hpa_range); > > + struct rm_data rm_data = { > + .cxlr = cxlr, > + .range = &hpa_range, > + }; > + > /* Remove region extents which overlap */ > - return device_for_each_child(&cxlr->cxlr_dax->dev, &hpa_range, > + return device_for_each_child(&cxlr->cxlr_dax->dev, &rm_data, > cxlr_rm_extent); > } > > @@ -338,8 +390,20 @@ static int cxlr_add_extent(struct cxl_dax_region *cxlr_dax, > return rc; > } > > - /* device model handles freeing region_extent */ > - return online_region_extent(region_extent); > + rc = online_region_extent(region_extent); > + /* device model handled freeing region_extent */ > + if (rc) > + return rc; > + > + rc = cxlr_notify_extent(cxlr_dax->cxlr, DCD_ADD_CAPACITY, region_extent); > + /* > + * The region device was breifly live but DAX layer ensures it was not > + * used > + */ > + if (rc) > + region_rm_extent(region_extent); > + > + return rc; > } > > /* Callers are expected to ensure cxled has been attached to a region */ > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index c858e3957fd5..9abbfc68c6ad 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -916,10 +916,16 @@ bool is_cxl_region(struct device *dev); > > extern struct bus_type cxl_bus_type; > > +struct cxl_notify_data { > + enum dc_event event; > + struct region_extent *region_extent; > +}; > + > struct cxl_driver { > const char *name; > int (*probe)(struct device *dev); > void (*remove)(struct device *dev); > + int (*notify)(struct device *dev, struct cxl_notify_data *notify_data); > struct device_driver drv; > int id; > }; > diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c > index 975860371d9f..f14b0cfa7edd 100644 > --- a/drivers/dax/bus.c > +++ b/drivers/dax/bus.c > @@ -183,6 +183,83 @@ static bool is_sparse(struct dax_region *dax_region) > return (dax_region->res.flags & IORESOURCE_DAX_SPARSE_CAP) != 0; > } > > +static void __dax_release_resource(struct dax_resource *dax_resource) > +{ > + struct dax_region *dax_region = dax_resource->region; > + > + lockdep_assert_held_write(&dax_region_rwsem); > + dev_dbg(dax_region->dev, "Extent release resource %pr\n", > + dax_resource->res); > + if (dax_resource->res) > + __release_region(&dax_region->res, dax_resource->res->start, > + resource_size(dax_resource->res)); > + dax_resource->res = NULL; > +} > + > +static void dax_release_resource(void *res) > +{ > + struct dax_resource *dax_resource = res; > + > + guard(rwsem_write)(&dax_region_rwsem); > + __dax_release_resource(dax_resource); > + kfree(dax_resource); > +} > + > +int dax_region_add_resource(struct dax_region *dax_region, > + struct device *device, > + resource_size_t start, resource_size_t length) > kdoc header? +{ > + struct resource *new_resource; > + int rc; > + > + struct dax_resource *dax_resource __free(kfree) = > + kzalloc(sizeof(*dax_resource), GFP_KERNEL); > + if (!dax_resource) > + return -ENOMEM; > + > + guard(rwsem_write)(&dax_region_rwsem); > + > + dev_dbg(dax_region->dev, "DAX region resource %pr\n", &dax_region->res); > + new_resource = __request_region(&dax_region->res, start, length, "extent", 0); > + if (!new_resource) { > + dev_err(dax_region->dev, "Failed to add region s:%pa l:%pa\n", > + &start, &length); > + return -ENOSPC; > + } > + > + dev_dbg(dax_region->dev, "add resource %pr\n", new_resource); > + dax_resource->region = dax_region; > + dax_resource->res = new_resource; > + dev_set_drvdata(device, dax_resource); > + rc = devm_add_action_or_reset(device, dax_release_resource, > + no_free_ptr(dax_resource)); > + /* On error; ensure driver data is cleared under semaphore */ > + if (rc) > + dev_set_drvdata(device, NULL); > + return rc; > +} > +EXPORT_SYMBOL_GPL(dax_region_add_resource); > + > +int dax_region_rm_resource(struct dax_region *dax_region, > + struct device *dev) kdoc header > +{ > + struct dax_resource *dax_resource; > + > + guard(rwsem_write)(&dax_region_rwsem); > + > + dax_resource = dev_get_drvdata(dev); > + if (!dax_resource) > + return 0; > + > + if (dax_resource->use_cnt) > + return -EBUSY; > + > + /* avoid races with users trying to use the extent */ > + __dax_release_resource(dax_resource); > + return 0; > +} > +EXPORT_SYMBOL_GPL(dax_region_rm_resource); > + > bool static_dev_dax(struct dev_dax *dev_dax) > { > return is_static(dev_dax->region); > @@ -296,19 +373,44 @@ static ssize_t region_align_show(struct device *dev, > static struct device_attribute dev_attr_region_align = > __ATTR(align, 0400, region_align_show, NULL); > > +#define for_each_child_resource(extent, res) \ > + for (res = (extent)->child; res; res = res->sibling) > + > +resource_size_t > +dax_avail_size(struct resource *dax_resource) kdoc header DJ > +{ > + resource_size_t rc; > + struct resource *used_res; > + > + rc = resource_size(dax_resource); > + for_each_child_resource(dax_resource, used_res) > + rc -= resource_size(used_res); > + return rc; > +} > +EXPORT_SYMBOL_GPL(dax_avail_size); > + > #define for_each_dax_region_resource(dax_region, res) \ > for (res = (dax_region)->res.child; res; res = res->sibling) > > static unsigned long long dax_region_avail_size(struct dax_region *dax_region) > { > - resource_size_t size = resource_size(&dax_region->res); > + resource_size_t size; > struct resource *res; > > lockdep_assert_held(&dax_region_rwsem); > > - if (is_sparse(dax_region)) > - return 0; > + if (is_sparse(dax_region)) { > + /* > + * Children of a sparse region represent available space not > + * used space. > + */ > + size = 0; > + for_each_dax_region_resource(dax_region, res) > + size += dax_avail_size(res); > + return size; > + } > > + size = resource_size(&dax_region->res); > for_each_dax_region_resource(dax_region, res) > size -= resource_size(res); > return size; > @@ -449,15 +551,26 @@ EXPORT_SYMBOL_GPL(kill_dev_dax); > static void trim_dev_dax_range(struct dev_dax *dev_dax) > { > int i = dev_dax->nr_range - 1; > - struct range *range = &dev_dax->ranges[i].range; > + struct dev_dax_range *dev_range = &dev_dax->ranges[i]; > + struct range *range = &dev_range->range; > struct dax_region *dax_region = dev_dax->region; > + struct resource *res = &dax_region->res; > > lockdep_assert_held_write(&dax_region_rwsem); > dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, > (unsigned long long)range->start, > (unsigned long long)range->end); > > - __release_region(&dax_region->res, range->start, range_len(range)); > + if (dev_range->dax_resource) { > + res = dev_range->dax_resource->res; > + dev_dbg(&dev_dax->dev, "Trim sparse extent %pr\n", res); > + } > + > + __release_region(res, range->start, range_len(range)); > + > + if (dev_range->dax_resource) > + dev_range->dax_resource->use_cnt--; > + > if (--dev_dax->nr_range == 0) { > kfree(dev_dax->ranges); > dev_dax->ranges = NULL; > @@ -640,7 +753,7 @@ static void dax_region_unregister(void *region) > > struct dax_region *alloc_dax_region(struct device *parent, int region_id, > struct range *range, int target_node, unsigned int align, > - unsigned long flags) > + unsigned long flags, struct dax_sparse_ops *sparse_ops) > { > struct dax_region *dax_region; > > @@ -658,12 +771,16 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, > || !IS_ALIGNED(range_len(range), align)) > return NULL; > > + if (!sparse_ops && (flags & IORESOURCE_DAX_SPARSE_CAP)) > + return NULL; > + > dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); > if (!dax_region) > return NULL; > > dev_set_drvdata(parent, dax_region); > kref_init(&dax_region->kref); > + dax_region->sparse_ops = sparse_ops; > dax_region->id = region_id; > dax_region->align = align; > dax_region->dev = parent; > @@ -845,7 +962,8 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) > } > > static int alloc_dev_dax_range(struct resource *parent, struct dev_dax *dev_dax, > - u64 start, resource_size_t size) > + u64 start, resource_size_t size, > + struct dax_resource *dax_resource) > { > struct device *dev = &dev_dax->dev; > struct dev_dax_range *ranges; > @@ -884,6 +1002,7 @@ static int alloc_dev_dax_range(struct resource *parent, struct dev_dax *dev_dax, > .start = alloc->start, > .end = alloc->end, > }, > + .dax_resource = dax_resource, > }; > > dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, > @@ -966,7 +1085,8 @@ static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) > int i; > > for (i = dev_dax->nr_range - 1; i >= 0; i--) { > - struct range *range = &dev_dax->ranges[i].range; > + struct dev_dax_range *dev_range = &dev_dax->ranges[i]; > + struct range *range = &dev_range->range; > struct dax_mapping *mapping = dev_dax->ranges[i].mapping; > struct resource *adjust = NULL, *res; > resource_size_t shrink; > @@ -982,12 +1102,21 @@ static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) > continue; > } > > - for_each_dax_region_resource(dax_region, res) > - if (strcmp(res->name, dev_name(dev)) == 0 > - && res->start == range->start) { > - adjust = res; > - break; > - } > + if (dev_range->dax_resource) { > + for_each_child_resource(dev_range->dax_resource->res, res) > + if (strcmp(res->name, dev_name(dev)) == 0 > + && res->start == range->start) { > + adjust = res; > + break; > + } > + } else { > + for_each_dax_region_resource(dax_region, res) > + if (strcmp(res->name, dev_name(dev)) == 0 > + && res->start == range->start) { > + adjust = res; > + break; > + } > + } > > if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, > "failed to find matching resource\n")) > @@ -1025,19 +1154,21 @@ static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) > } > > /** > - * dev_dax_resize_static - Expand the device into the unused portion of the > - * region. This may involve adjusting the end of an existing resource, or > - * allocating a new resource. > + * __dev_dax_resize - Expand the device into the unused portion of the region. > + * This may involve adjusting the end of an existing resource, or allocating a > + * new resource. > * > * @parent: parent resource to allocate this range in > * @dev_dax: DAX device to be expanded > * @to_alloc: amount of space to alloc; must be <= space available in @parent > + * @dax_resource: if sparse; the parent resource > * > * Return the amount of space allocated or -ERRNO on failure > */ > -static ssize_t dev_dax_resize_static(struct resource *parent, > - struct dev_dax *dev_dax, > - resource_size_t to_alloc) > +static ssize_t __dev_dax_resize(struct resource *parent, > + struct dev_dax *dev_dax, > + resource_size_t to_alloc, > + struct dax_resource *dax_resource) > { > struct resource *res, *first; > int rc; > @@ -1045,7 +1176,8 @@ static ssize_t dev_dax_resize_static(struct resource *parent, > first = parent->child; > if (!first) { > rc = alloc_dev_dax_range(parent, dev_dax, > - parent->start, to_alloc); > + parent->start, to_alloc, > + dax_resource); > if (rc) > return rc; > return to_alloc; > @@ -1059,7 +1191,8 @@ static ssize_t dev_dax_resize_static(struct resource *parent, > if (res == first && res->start > parent->start) { > alloc = min(res->start - parent->start, to_alloc); > rc = alloc_dev_dax_range(parent, dev_dax, > - parent->start, alloc); > + parent->start, alloc, > + dax_resource); > if (rc) > return rc; > return alloc; > @@ -1083,7 +1216,8 @@ static ssize_t dev_dax_resize_static(struct resource *parent, > return rc; > return alloc; > } > - rc = alloc_dev_dax_range(parent, dev_dax, res->end + 1, alloc); > + rc = alloc_dev_dax_range(parent, dev_dax, res->end + 1, alloc, > + dax_resource); > if (rc) > return rc; > return alloc; > @@ -1094,6 +1228,54 @@ static ssize_t dev_dax_resize_static(struct resource *parent, > return 0; > } > > +static ssize_t dev_dax_resize_static(struct dax_region *dax_region, > + struct dev_dax *dev_dax, > + resource_size_t to_alloc) > +{ > + return __dev_dax_resize(&dax_region->res, dev_dax, to_alloc, NULL); > +} > + > +static int find_free_extent(struct device *dev, void *data) > +{ > + struct dax_region *dax_region = data; > + struct dax_resource *dax_resource; > + > + if (!dax_region->sparse_ops->is_extent(dev)) > + return 0; > + > + dax_resource = dev_get_drvdata(dev); > + if (!dax_resource || !dax_avail_size(dax_resource->res)) > + return 0; > + return 1; > +} > + > +static ssize_t dev_dax_resize_sparse(struct dax_region *dax_region, > + struct dev_dax *dev_dax, > + resource_size_t to_alloc) > +{ > + struct dax_resource *dax_resource; > + resource_size_t available_size; > + struct device *extent_dev; > + ssize_t alloc; > + > + extent_dev = device_find_child(dax_region->dev, dax_region, > + find_free_extent); > + if (!extent_dev) > + return 0; > + > + dax_resource = dev_get_drvdata(extent_dev); > + if (!dax_resource) > + return 0; > + > + available_size = dax_avail_size(dax_resource->res); > + to_alloc = min(available_size, to_alloc); > + alloc = __dev_dax_resize(dax_resource->res, dev_dax, to_alloc, dax_resource); > + if (alloc > 0) > + dax_resource->use_cnt++; > + put_device(extent_dev); > + return alloc; > +} > + > static ssize_t dev_dax_resize(struct dax_region *dax_region, > struct dev_dax *dev_dax, resource_size_t size) > { > @@ -1117,7 +1299,10 @@ static ssize_t dev_dax_resize(struct dax_region *dax_region, > return -ENXIO; > > retry: > - alloc = dev_dax_resize_static(&dax_region->res, dev_dax, to_alloc); > + if (is_sparse(dax_region)) > + alloc = dev_dax_resize_sparse(dax_region, dev_dax, to_alloc); > + else > + alloc = dev_dax_resize_static(dax_region, dev_dax, to_alloc); > if (alloc <= 0) > return alloc; > to_alloc -= alloc; > @@ -1226,7 +1411,7 @@ static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, > to_alloc = range_len(&r); > if (alloc_is_aligned(dev_dax, to_alloc)) > rc = alloc_dev_dax_range(&dax_region->res, dev_dax, r.start, > - to_alloc); > + to_alloc, NULL); > up_write(&dax_dev_rwsem); > up_write(&dax_region_rwsem); > > @@ -1494,8 +1679,14 @@ static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data) > device_initialize(dev); > dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); > > + if (is_sparse(dax_region) && data->size) { > + dev_err(parent, "Sparse DAX region devices are created initially with 0 size"); > + rc = -EINVAL; > + goto err_id; > + } > + > rc = alloc_dev_dax_range(&dax_region->res, dev_dax, dax_region->res.start, > - data->size); > + data->size, NULL); > if (rc) > goto err_range; > > diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h > index 783bfeef42cc..ae5029ea6047 100644 > --- a/drivers/dax/bus.h > +++ b/drivers/dax/bus.h > @@ -9,6 +9,7 @@ struct dev_dax; > struct resource; > struct dax_device; > struct dax_region; > +struct dax_sparse_ops; > > /* dax bus specific ioresource flags */ > #define IORESOURCE_DAX_STATIC BIT(0) > @@ -17,7 +18,7 @@ struct dax_region; > > struct dax_region *alloc_dax_region(struct device *parent, int region_id, > struct range *range, int target_node, unsigned int align, > - unsigned long flags); > + unsigned long flags, struct dax_sparse_ops *sparse_ops); > > struct dev_dax_data { > struct dax_region *dax_region; > diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c > index 367e86b1c22a..bf3b82b0120d 100644 > --- a/drivers/dax/cxl.c > +++ b/drivers/dax/cxl.c > @@ -5,6 +5,60 @@ > > #include "../cxl/cxl.h" > #include "bus.h" > +#include "dax-private.h" > + > +static int __cxl_dax_add_resource(struct dax_region *dax_region, > + struct region_extent *region_extent) > +{ > + resource_size_t start, length; > + struct device *dev; > + > + dev = ®ion_extent->dev; > + start = dax_region->res.start + region_extent->hpa_range.start; > + length = range_len(®ion_extent->hpa_range); > + return dax_region_add_resource(dax_region, dev, start, length); > +} > + > +static int cxl_dax_add_resource(struct device *dev, void *data) > +{ > + struct dax_region *dax_region = data; > + struct region_extent *region_extent; > + > + region_extent = to_region_extent(dev); > + if (!region_extent) > + return 0; > + > + dev_dbg(dax_region->dev, "Adding resource HPA %par\n", > + ®ion_extent->hpa_range); > + > + return __cxl_dax_add_resource(dax_region, region_extent); > +} > + > +static int cxl_dax_region_notify(struct device *dev, > + struct cxl_notify_data *notify_data) > +{ > + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); > + struct dax_region *dax_region = dev_get_drvdata(dev); > + struct region_extent *region_extent = notify_data->region_extent; > + > + switch (notify_data->event) { > + case DCD_ADD_CAPACITY: > + return __cxl_dax_add_resource(dax_region, region_extent); > + case DCD_RELEASE_CAPACITY: > + return dax_region_rm_resource(dax_region, ®ion_extent->dev); > + case DCD_FORCED_CAPACITY_RELEASE: > + default: > + dev_err(&cxlr_dax->dev, "Unknown DC event %d\n", > + notify_data->event); > + break; > + } > + > + return -ENXIO; > +} > + > +struct dax_sparse_ops sparse_ops = { > + .is_extent = is_region_extent, > +}; > > static int cxl_dax_region_probe(struct device *dev) > { > @@ -24,14 +78,16 @@ static int cxl_dax_region_probe(struct device *dev) > flags |= IORESOURCE_DAX_SPARSE_CAP; > > dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid, > - PMD_SIZE, flags); > + PMD_SIZE, flags, &sparse_ops); > if (!dax_region) > return -ENOMEM; > > - if (cxlr->mode == CXL_REGION_DC) > + if (cxlr->mode == CXL_REGION_DC) { > + device_for_each_child(&cxlr_dax->dev, dax_region, > + cxl_dax_add_resource); > /* Add empty seed dax device */ > dev_size = 0; > - else > + } else > dev_size = range_len(&cxlr_dax->hpa_range); > > data = (struct dev_dax_data) { > @@ -47,6 +103,7 @@ static int cxl_dax_region_probe(struct device *dev) > static struct cxl_driver cxl_dax_region_driver = { > .name = "cxl_dax_region", > .probe = cxl_dax_region_probe, > + .notify = cxl_dax_region_notify, > .id = CXL_DEVICE_DAX_REGION, > .drv = { > .suppress_bind_attrs = true, > diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h > index ccde98c3d4e2..9e9f98c85620 100644 > --- a/drivers/dax/dax-private.h > +++ b/drivers/dax/dax-private.h > @@ -16,6 +16,36 @@ struct inode *dax_inode(struct dax_device *dax_dev); > int dax_bus_init(void); > void dax_bus_exit(void); > > +/** > + * struct dax_resource - For sparse regions; an active resource > + * @region: dax_region this resources is in > + * @res: resource > + * @use_cnt: count the number of uses of this resource > + * > + * Changes to the dax_reigon and the dax_resources within it are protected by > + * dax_region_rwsem > + */ > +struct dax_resource { > + struct dax_region *region; > + struct resource *res; > + unsigned int use_cnt; > +}; > +int dax_region_add_resource(struct dax_region *dax_region, struct device *dev, > + resource_size_t start, resource_size_t length); > +int dax_region_rm_resource(struct dax_region *dax_region, > + struct device *dev); > +resource_size_t dax_avail_size(struct resource *dax_resource); > + > +typedef int (*match_cb)(struct device *dev, resource_size_t *size_avail); > + > +/** > + * struct dax_sparse_ops - Operations for sparse regions > + * @is_extent: return if the device is an extent > + */ > +struct dax_sparse_ops { > + bool (*is_extent)(struct device *dev); > +}; > + > /** > * struct dax_region - mapping infrastructure for dax devices > * @id: kernel-wide unique region for a memory range > @@ -27,6 +57,7 @@ void dax_bus_exit(void); > * @res: resource tree to track instance allocations > * @seed: allow userspace to find the first unbound seed device > * @youngest: allow userspace to find the most recently created device > + * @sparse_ops: operations required for sparse regions > */ > struct dax_region { > int id; > @@ -38,6 +69,7 @@ struct dax_region { > struct resource res; > struct device *seed; > struct device *youngest; > + struct dax_sparse_ops *sparse_ops; > }; > > struct dax_mapping { > @@ -62,6 +94,7 @@ struct dax_mapping { > * @pgoff: page offset > * @range: resource-span > * @mapping: device to assist in interrogating the range layout > + * @dax_resource: if not NULL; dax sparse resource containing this range > */ > struct dev_dax { > struct dax_region *region; > @@ -79,6 +112,7 @@ struct dev_dax { > unsigned long pgoff; > struct range range; > struct dax_mapping *mapping; > + struct dax_resource *dax_resource; > } *ranges; > }; > > diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > index 5e7c53f18491..0eea65052874 100644 > --- a/drivers/dax/hmem/hmem.c > +++ b/drivers/dax/hmem/hmem.c > @@ -28,7 +28,7 @@ static int dax_hmem_probe(struct platform_device *pdev) > > mri = dev->platform_data; > dax_region = alloc_dax_region(dev, pdev->id, &mri->range, > - mri->target_node, PMD_SIZE, flags); > + mri->target_node, PMD_SIZE, flags, NULL); > if (!dax_region) > return -ENOMEM; > > diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c > index c8ebf4e281f2..f927e855f240 100644 > --- a/drivers/dax/pmem.c > +++ b/drivers/dax/pmem.c > @@ -54,7 +54,7 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) > range.start += offset; > dax_region = alloc_dax_region(dev, region_id, &range, > nd_region->target_node, le32_to_cpu(pfn_sb->align), > - IORESOURCE_DAX_STATIC); > + IORESOURCE_DAX_STATIC, NULL); > if (!dax_region) > return ERR_PTR(-ENOMEM); > >