Re: [PATCH v4 24/28] dax/region: Create resources on sparse DAX regions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 07 Oct 2024 18:16:30 -0500
ira.weiny@xxxxxxxxx wrote:

> From: Navneet Singh <navneet.singh@xxxxxxxxx>
> 
> DAX regions which map dynamic capacity partitions require that memory be
> allowed to come and go.  Recall sparse regions were created for this
> purpose.  Now that extents can be realized within DAX regions the DAX
> region driver can start tracking sub-resource information.
> 
> The tight relationship between DAX region operations and extent
> operations require memory changes to be controlled synchronously with
> the user of the region.  Synchronize through the dax_region_rwsem and by
> having the region driver drive both the region device as well as the
> extent sub-devices.
> 
> Recall requests to remove extents can happen at any time and that a host
> is not obligated to release the memory until it is not being used.  If
> an extent is not used allow a release response.
> 
> The DAX layer has no need for the details of the CXL memory extent
> devices.  Expose extents to the DAX layer as device children of the DAX
> region device.  A single callback from the driver aids the DAX layer to
> determine if the child device is an extent.  The DAX layer also
> registers a devres function to automatically clean up when the device is
> removed from the region.
> 
> There is a race between extents being surfaced and the dax_cxl driver
> being loaded.  The driver must therefore scan for any existing extents
> while still under the device lock.
> 
> Respond to extent notifications.  Manage the DAX region resource tree
> based on the extents lifetime.  Return the status of remove
> notifications to lower layers such that it can manage the hardware
> appropriately.
> 
> Signed-off-by: Navneet Singh <navneet.singh@xxxxxxxxx>
> Co-developed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>
> 
More somewhat superficial review from me.
Needs DAX expert reviewers.

Jonathan

> ---
>  drivers/cxl/core/extent.c |  74 ++++++++++++--
>  drivers/cxl/cxl.h         |   6 ++
>  drivers/dax/bus.c         | 243 +++++++++++++++++++++++++++++++++++++++++-----
>  drivers/dax/bus.h         |   3 +-
>  drivers/dax/cxl.c         |  62 +++++++++++-
>  drivers/dax/dax-private.h |  42 ++++++++
>  drivers/dax/hmem/hmem.c   |   2 +-
>  drivers/dax/pmem.c        |   2 +-
>  8 files changed, 396 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> index a1eb6e8e4f1a..75fb73ce2185 100644
> --- a/drivers/cxl/core/extent.c
> +++ b/drivers/cxl/core/extent.c
> @@ -270,20 +270,65 @@ static void calc_hpa_range(struct cxl_endpoint_decoder *cxled,
>  	hpa_range->end = hpa_range->start + range_len(dpa_range) - 1;
>  }
>  
> +static int cxlr_notify_extent(struct cxl_region *cxlr, enum dc_event event,
> +			      struct region_extent *region_extent)
> +{
> +	struct device *dev = &cxlr->cxlr_dax->dev;
> +	struct cxl_notify_data notify_data;
> +	struct cxl_driver *driver;
> +
> +	dev_dbg(dev, "Trying notify: type %d HPA %pra\n",
> +		event, &region_extent->hpa_range);
> +
> +	guard(device)(dev);
> +
> +	/*
> +	 * The lack of a driver indicates a notification has failed.  No user
> +	 * space coordiantion was possible.
spell check.
coordination

> +	 */
> +	if (!dev->driver)
> +		return 0;
> +	driver = to_cxl_drv(dev->driver);
> +	if (!driver->notify)
> +		return 0;
> +
> +	notify_data = (struct cxl_notify_data) {
> +		.event = event,
> +		.region_extent = region_extent,
> +	};
> +
> +	dev_dbg(dev, "Notify: type %d HPA %pra\n",
> +		event, &region_extent->hpa_range);
> +	return driver->notify(dev, &notify_data);
> +}

> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index f0e3f8c787df..4e19d18369de 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -183,6 +183,86 @@ static bool is_sparse(struct dax_region *dax_region)
>  	return (dax_region->res.flags & IORESOURCE_DAX_SPARSE_CAP) != 0;
>  }

> +
> +int dax_region_add_resource(struct dax_region *dax_region,
> +			    struct device *device,
> +			    resource_size_t start, resource_size_t length)
> +{
> +	struct resource *new_resource;
> +	int rc;
> +
> +	struct dax_resource *dax_resource __free(kfree) =
> +				kzalloc(sizeof(*dax_resource), GFP_KERNEL);
> +	if (!dax_resource)
> +		return -ENOMEM;
> +
> +	guard(rwsem_write)(&dax_region_rwsem);
> +
> +	dev_dbg(dax_region->dev, "DAX region resource %pr\n", &dax_region->res);
> +	new_resource = __request_region(&dax_region->res, start, length, "extent", 0);
> +	if (!new_resource) {
> +		dev_err(dax_region->dev, "Failed to add region s:%pa l:%pa\n",
> +			&start, &length);
> +		return -ENOSPC;
> +	}
> +
> +	dev_dbg(dax_region->dev, "add resource %pr\n", new_resource);
> +	dax_resource->region = dax_region;
> +	dax_resource->res = new_resource;
> +	dev_set_drvdata(device, dax_resource);
> +	rc = devm_add_action_or_reset(device, dax_release_resource,
> +				      no_free_ptr(dax_resource));
> +	/*  On error; ensure driver data is cleared under semaphore */

It's not used in the dax_release_resource callback (that I can
immediately spot) so could you just not set it until after
this has succeeded?

> +	if (rc)
> +		dev_set_drvdata(device, NULL);
i.e. move
	dev_set_drvdata(device, dax_resource);
to here.

> +	return rc;
> +}
> +EXPORT_SYMBOL_GPL(dax_region_add_resource);
Adding quite a few exports. Is it time to namespace DAX exports?
Perhaps a follow up series.



>  bool static_dev_dax(struct dev_dax *dev_dax)
>  {
>  	return is_static(dev_dax->region);
> @@ -296,19 +376,44 @@ static ssize_t region_align_show(struct device *dev,
>  static struct device_attribute dev_attr_region_align =
>  		__ATTR(align, 0400, region_align_show, NULL);
>  
> +#define for_each_child_resource(extent, res) \
> +	for (res = (extent)->child; res; res = res->sibling)
> +
Extent naming in here is a little off for a general sounding macro.
Maybe for_each_child_resource(parent, res) or something like that?

Seem generally useful. Maybe move to resource.h?

> @@ -1494,8 +1679,14 @@ static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
>  	device_initialize(dev);
>  	dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
>  
> +	if (is_sparse(dax_region) && data->size) {
> +		dev_err(parent, "Sparse DAX region devices must be created initially with 0 size");
> +		rc = -EINVAL;
> +		goto err_id;

Right label?  This code doesn't have side effects and the next error path is goto err_range
Looks like you fail to reverse the alloc_dev_dax_id() in this error path.

> +	}
> +
>  	rc = alloc_dev_dax_range(&dax_region->res, dev_dax, dax_region->res.start,
> -				 data->size);
> +				 data->size, NULL);
>  	if (rc)
>  		goto err_range;
>  
> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> index 783bfeef42cc..ae5029ea6047 100644
> --- a/drivers/dax/bus.h
> +++ b/drivers/dax/bus.h
> @@ -9,6 +9,7 @@ struct dev_dax;
>  struct resource;
>  struct dax_device;
>  struct dax_region;
> +struct dax_sparse_ops;
>  
>  /* dax bus specific ioresource flags */
>  #define IORESOURCE_DAX_STATIC BIT(0)
> @@ -17,7 +18,7 @@ struct dax_region;
>  
>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>  		struct range *range, int target_node, unsigned int align,
> -		unsigned long flags);
> +		unsigned long flags, struct dax_sparse_ops *sparse_ops);
>  
>  struct dev_dax_data {
>  	struct dax_region *dax_region;
> diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
> index 367e86b1c22a..df979ea2cb59 100644
> --- a/drivers/dax/cxl.c
> +++ b/drivers/dax/cxl.c
> @@ -5,6 +5,58 @@
>  
>  #include "../cxl/cxl.h"
>  #include "bus.h"
> +#include "dax-private.h"
> +
> +static int __cxl_dax_add_resource(struct dax_region *dax_region,
> +				  struct region_extent *region_extent)
> +{
> +	resource_size_t start, length;
> +	struct device *dev;
> +
> +	dev = &region_extent->dev;
Might as well do
	struct device *dev = &region_extent->dev;


> +	start = dax_region->res.start + region_extent->hpa_range.start;
> +	length = range_len(&region_extent->hpa_range);
> +	return dax_region_add_resource(dax_region, dev, start, length);
> +}


> diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
> index ccde98c3d4e2..e3866115243e 100644
> --- a/drivers/dax/dax-private.h
> +++ b/drivers/dax/dax-private.h
...

> +/*
> + * Similar to run_dax() dax_region_{add,rm}_resource() and dax_avail_size() are
> + * exported but are not intended to be generic operations outside the dax
> + * subsystem.  They are only generic between the dax layer and the dax drivers.
> + */
> +int dax_region_add_resource(struct dax_region *dax_region, struct device *dev,
> +			    resource_size_t start, resource_size_t length);
> +int dax_region_rm_resource(struct dax_region *dax_region,
> +			   struct device *dev);
> +resource_size_t dax_avail_size(struct resource *dax_resource);
> +
> +typedef int (*match_cb)(struct device *dev, resource_size_t *size_avail);
Why is this here?





[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux