Re: [PATCH] cxl: Update Soft Reserved resources upon region creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Dec 02, 2024 at 09:55:42AM -0600, Nathan Fontenot wrote:
> Update handling of SOFT RESERVE iomem resources that intersect with
> CXL region resources to remove the intersections from the SOFT RESERVE
> resources. The current approach of leaving the SOFT RESERVE
> resource as is can cause failures during hotplug replace of CXL
> devices because the resource is not available for reuse after
> teardown of the CXL device.
> 
> The approach is to trim out any pieces of SOFT RESERVE resources
> that intersect CXL regions. To do this, first set aside any SOFT RESERVE
> resources that intersect with a CFMWS into a separate resource tree
> during e820__reserve_resources_late() that would have been otherwise
> added to the iomem resource tree.
> 
> As CXL regions are created the cxl resource created for the new
> region is used to trim intersections from the SOFT RESERVE
> resources that were previously set aside.
> 
> Once CXL device probe has completed ant remaining SOFT RESERVE resources
/ant/any/
> remaining are added to the iomem resource tree. As each resource
> is added to the oiomem resource tree a new notifier chain is invoked
/oiomem/iomem/
> to notify the dax driver of newly added SOFT RESERVE resources so that
> the dax driver can consume them.

In general, the patch is kind of complicated and hard to review for me.
I am wondering if it can be broken down to make it easier to
review.

One minor thing inline.

> 
> Signed-off-by: Nathan Fontenot <nathan.fontenot@xxxxxxx>
> ---
>  arch/x86/kernel/e820.c    |  17 ++++-
>  drivers/cxl/core/region.c |   8 +-
>  drivers/cxl/port.c        |  15 ++++
>  drivers/dax/hmem/device.c |  13 ++--
>  drivers/dax/hmem/hmem.c   |  15 ++++
>  drivers/dax/hmem/hmem.h   |  11 +++
>  include/linux/dax.h       |   4 -
>  include/linux/ioport.h    |   6 ++
>  kernel/resource.c         | 155 +++++++++++++++++++++++++++++++++++++-
>  9 files changed, 229 insertions(+), 15 deletions(-)
>  create mode 100644 drivers/dax/hmem/hmem.h
> 
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index 4893d30ce438..cab82e9324a5 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -1210,14 +1210,23 @@ static unsigned long __init ram_alignment(resource_size_t pos)
>  
>  void __init e820__reserve_resources_late(void)
>  {
> -	int i;
>  	struct resource *res;
> +	int i;
>  
> +	/*
> +	 * Prior to inserting SOFT_RESERVED resources we want to check for an
> +	 * intersection with potential CXL resources. Any SOFT_RESERVED resources
> +	 * that do intersect a potential CXL resource are set aside so they
> +	 * can be trimmed to accommodate CXL resource intersections and added to
> +	 * the iomem resource tree after the CXL drivers have completed their
> +	 * device probe.
> +	 */
>  	res = e820_res;
> -	for (i = 0; i < e820_table->nr_entries; i++) {
> -		if (!res->parent && res->end)
> +	for (i = 0; i < e820_table->nr_entries; i++, res++) {
> +		if (res->desc == IORES_DESC_SOFT_RESERVED)
> +			insert_soft_reserve_resource(res);
> +		else if (!res->parent && res->end)
>  			insert_resource_expand_to_fit(&iomem_resource, res);
> -		res++;
Maybe we can keep the original (res++) here to avoid the noise since it is a
style thing and does not affect what we want to achieve. 

Fan
>  	}
>  
>  	/*
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 21ad5f242875..c458a6313b31 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -3226,6 +3226,12 @@ static int match_region_by_range(struct device *dev, void *data)
>  	return rc;
>  }
>  
> +static int insert_region_resource(struct resource *parent, struct resource *res)
> +{
> +	trim_soft_reserve_resources(res);
> +	return insert_resource(parent, res);
> +}
> +
>  /* Establish an empty region covering the given HPA range */
>  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  					   struct cxl_endpoint_decoder *cxled)
> @@ -3272,7 +3278,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  
>  	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>  				    dev_name(&cxlr->dev));
> -	rc = insert_resource(cxlrd->res, res);
> +	rc = insert_region_resource(cxlrd->res, res);
>  	if (rc) {
>  		/*
>  		 * Platform-firmware may not have split resources like "System
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index d7d5d982ce69..4461f2a80d72 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -89,6 +89,20 @@ static int cxl_switch_port_probe(struct cxl_port *port)
>  	return -ENXIO;
>  }
>  
> +static void cxl_sr_update(struct work_struct *w)
> +{
> +	merge_soft_reserve_resources();
> +}
> +
> +DECLARE_DELAYED_WORK(cxl_sr_work, cxl_sr_update);
> +
> +static void schedule_soft_reserve_update(void)
> +{
> +	int timeout = 5 * HZ;
> +
> +	mod_delayed_work(system_wq, &cxl_sr_work, timeout);
> +}
> +
>  static int cxl_endpoint_port_probe(struct cxl_port *port)
>  {
>  	struct cxl_endpoint_dvsec_info info = { .port = port };
> @@ -140,6 +154,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
>  	 */
>  	device_for_each_child(&port->dev, root, discover_region);
>  
> +	schedule_soft_reserve_update();
>  	return 0;
>  }
>  
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index f9e1a76a04a9..c45791ad4858 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -4,6 +4,7 @@
>  #include <linux/module.h>
>  #include <linux/dax.h>
>  #include <linux/mm.h>
> +#include "hmem.h"
>  
>  static bool nohmem;
>  module_param_named(disable, nohmem, bool, 0444);
> @@ -17,6 +18,9 @@ static struct resource hmem_active = {
>  	.flags = IORESOURCE_MEM,
>  };
>  
> +struct platform_device *hmem_pdev;
> +EXPORT_SYMBOL_GPL(hmem_pdev);
> +
>  int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>  {
>  	struct resource *res;
> @@ -35,7 +39,6 @@ EXPORT_SYMBOL_GPL(walk_hmem_resources);
>  
>  static void __hmem_register_resource(int target_nid, struct resource *res)
>  {
> -	struct platform_device *pdev;
>  	struct resource *new;
>  	int rc;
>  
> @@ -51,15 +54,15 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>  	if (platform_initialized)
>  		return;
>  
> -	pdev = platform_device_alloc("hmem_platform", 0);
> -	if (!pdev) {
> +	hmem_pdev = platform_device_alloc("hmem_platform", 0);
> +	if (!hmem_pdev) {
>  		pr_err_once("failed to register device-dax hmem_platform device\n");
>  		return;
>  	}
>  
> -	rc = platform_device_add(pdev);
> +	rc = platform_device_add(hmem_pdev);
>  	if (rc)
> -		platform_device_put(pdev);
> +		platform_device_put(hmem_pdev);
>  	else
>  		platform_initialized = true;
>  }
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index 5e7c53f18491..d626b60a9716 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -5,6 +5,7 @@
>  #include <linux/pfn_t.h>
>  #include <linux/dax.h>
>  #include "../bus.h"
> +#include "hmem.h"
>  
>  static bool region_idle;
>  module_param_named(region_idle, region_idle, bool, 0644);
> @@ -123,8 +124,22 @@ static int hmem_register_device(struct device *host, int target_nid,
>  	return rc;
>  }
>  
> +static int dax_hmem_cb(struct notifier_block *nb, unsigned long action,
> +		       void *arg)
> +{
> +	struct resource *res = arg;
> +
> +	return hmem_register_device(&hmem_pdev->dev,
> +				    phys_to_target_node(res->start), res);
> +}
> +
> +static struct notifier_block hmem_nb = {
> +	.notifier_call = dax_hmem_cb
> +};
> +
>  static int dax_hmem_platform_probe(struct platform_device *pdev)
>  {
> +	register_soft_reserve_notifier(&hmem_nb);
>  	return walk_hmem_resources(&pdev->dev, hmem_register_device);
>  }
>  
> diff --git a/drivers/dax/hmem/hmem.h b/drivers/dax/hmem/hmem.h
> new file mode 100644
> index 000000000000..95583b59cef7
> --- /dev/null
> +++ b/drivers/dax/hmem/hmem.h
> @@ -0,0 +1,11 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#ifndef _HMEM_H
> +#define _HMEM_H
> +
> +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
> +			    const struct resource *res);
> +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
> +
> +extern struct platform_device *hmem_pdev;
> +
> +#endif
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 9d3e3327af4c..119b4e27a592 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -282,8 +282,4 @@ static inline void hmem_register_resource(int target_nid, struct resource *r)
>  {
>  }
>  #endif
> -
> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
> -			    const struct resource *res);
> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
>  #endif
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index 6e9fb667a1c5..487371a46392 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -14,6 +14,7 @@
>  #include <linux/compiler.h>
>  #include <linux/minmax.h>
>  #include <linux/types.h>
> +#include <linux/notifier.h>
>  /*
>   * Resources are tree-like, allowing
>   * nesting etc..
> @@ -249,6 +250,11 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start);
>  int adjust_resource(struct resource *res, resource_size_t start,
>  		    resource_size_t size);
>  resource_size_t resource_alignment(struct resource *res);
> +extern void trim_soft_reserve_resources(const struct resource *res);
> +extern void merge_soft_reserve_resources(void);
> +extern int insert_soft_reserve_resource(struct resource *res);
> +extern int register_soft_reserve_notifier(struct notifier_block *nb);
> +extern int unregister_soft_reserve_notifier(struct notifier_block *nb);
>  static inline resource_size_t resource_size(const struct resource *res)
>  {
>  	return res->end - res->start + 1;
> diff --git a/kernel/resource.c b/kernel/resource.c
> index a83040fde236..8fc4121a1887 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -30,7 +30,7 @@
>  #include <linux/string.h>
>  #include <linux/vmalloc.h>
>  #include <asm/io.h>
> -
> +#include <linux/acpi.h>
>  
>  struct resource ioport_resource = {
>  	.name	= "PCI IO",
> @@ -48,7 +48,15 @@ struct resource iomem_resource = {
>  };
>  EXPORT_SYMBOL(iomem_resource);
>  
> +struct resource srmem_resource = {
> +	.name	= "Soft Reserved mem",
> +	.start	= 0,
> +	.end	= -1,
> +	.flags	= IORESOURCE_MEM,
> +};
> +
>  static DEFINE_RWLOCK(resource_lock);
> +static DEFINE_RWLOCK(srmem_resource_lock);
>  
>  static struct resource *next_resource(struct resource *p, bool skip_children)
>  {
> @@ -1034,6 +1042,151 @@ int adjust_resource(struct resource *res, resource_size_t start,
>  }
>  EXPORT_SYMBOL(adjust_resource);
>  
> +static BLOCKING_NOTIFIER_HEAD(soft_reserve_chain);
> +
> +int register_soft_reserve_notifier(struct notifier_block *nb)
> +{
> +	return blocking_notifier_chain_register(&soft_reserve_chain, nb);
> +}
> +EXPORT_SYMBOL(register_soft_reserve_notifier);
> +
> +int unregister_soft_reserve_notifier(struct notifier_block *nb)
> +{
> +	return blocking_notifier_chain_unregister(&soft_reserve_chain, nb);
> +}
> +EXPORT_SYMBOL(unregister_soft_reserve_notifier);
> +
> +static int soft_reserve_notify(unsigned long val, void *v)
> +{
> +	struct resource *res = v;
> +
> +	pr_info("Adding Soft Reserve resource %pr\n", res);
> +	return blocking_notifier_call_chain(&soft_reserve_chain, val, v);
> +}
> +
> +static void trim_soft_reserve(struct resource *sr_res,
> +			      const struct resource *res)
> +{
> +	struct resource *new_res;
> +
> +	if (sr_res->start == res->start && sr_res->end == res->end) {
> +		release_resource(sr_res);
> +		free_resource(sr_res);
> +	} else if (sr_res->start == res->start) {
> +		WARN_ON(adjust_resource(sr_res, res->end + 1,
> +					sr_res->end - res->end));
> +	} else if (sr_res->end == res->end) {
> +		WARN_ON(adjust_resource(sr_res, sr_res->start,
> +					res->start - sr_res->start));
> +	} else {
> +		/*
> +		 * Adjust existing resource to cover the resource
> +		 * range prior to the range to be trimmed.
> +		 */
> +		adjust_resource(sr_res, sr_res->start,
> +				res->start - sr_res->start);
> +
> +		/*
> +		 * Add new resource to cover the resource range for
> +		 * the range after the range to be trimmed.
> +		 */
> +		new_res = alloc_resource(GFP_KERNEL);
> +		if (!new_res)
> +			return;
> +
> +		*new_res = DEFINE_RES_NAMED(res->end + 1, sr_res->end - res->end,
> +					    "Soft Reserved", sr_res->flags);
> +		new_res->desc = IORES_DESC_SOFT_RESERVED;
> +		insert_resource(&srmem_resource, new_res);
> +	}
> +}
> +
> +void trim_soft_reserve_resources(const struct resource *res)
> +{
> +	struct resource *sr_res;
> +
> +	write_lock(&srmem_resource_lock);
> +	for (sr_res = srmem_resource.child; sr_res; sr_res = sr_res->sibling) {
> +		if (resource_contains(sr_res, res)) {
> +			trim_soft_reserve(sr_res, res);
> +			break;
> +		}
> +	}
> +	write_unlock(&srmem_resource_lock);
> +}
> +EXPORT_SYMBOL(trim_soft_reserve_resources);
> +
> +void merge_soft_reserve_resources(void)
> +{
> +	struct resource *sr_res, *next;
> +
> +	write_lock(&srmem_resource_lock);
> +	for (sr_res = srmem_resource.child; sr_res; sr_res = next) {
> +		next = sr_res->sibling;
> +
> +		release_resource(sr_res);
> +		if (insert_resource(&iomem_resource, sr_res))
> +			pr_info("Could not add Soft Reserve %pr\n", sr_res);
> +		else
> +			soft_reserve_notify(0, sr_res);
> +	}
> +	write_unlock(&srmem_resource_lock);
> +}
> +EXPORT_SYMBOL(merge_soft_reserve_resources);
> +
> +struct srmem_arg {
> +	struct resource *res;
> +	int overlaps;
> +};
> +
> +static int srmem_parse_cfmws(union acpi_subtable_headers *hdr,
> +			     void *arg, const unsigned long unused)
> +{
> +	struct acpi_cedt_cfmws *cfmws;
> +	struct srmem_arg *args = arg;
> +	struct resource cfmws_res;
> +	struct resource *res;
> +
> +	res = args->res;
> +
> +	cfmws = (struct acpi_cedt_cfmws *)hdr;
> +	cfmws_res = DEFINE_RES_MEM(cfmws->base_hpa,
> +				   cfmws->base_hpa + cfmws->window_size);
> +
> +	if (resource_overlaps(&cfmws_res, res)) {
> +		args->overlaps += 1;
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool resource_overlaps_cfmws(struct resource *res)
> +{
> +	struct srmem_arg arg = {
> +		.res = res,
> +		.overlaps = 0
> +	};
> +
> +	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, srmem_parse_cfmws, &arg);
> +
> +	if (arg.overlaps)
> +		return true;
> +
> +	return false;
> +}
> +
> +int insert_soft_reserve_resource(struct resource *res)
> +{
> +	if (resource_overlaps_cfmws(res)) {
> +		pr_info("Reserving Soft Reserve %pr\n", res);
> +		return insert_resource(&srmem_resource, res);
> +	}
> +
> +	return insert_resource(&iomem_resource, res);
> +}
> +EXPORT_SYMBOL(insert_soft_reserve_resource);
> +
>  static void __init
>  __reserve_region_with_split(struct resource *root, resource_size_t start,
>  			    resource_size_t end, const char *name)
> -- 
> 2.43.0
> 

-- 
Fan Ni (From gmail)




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux