Re: [PATCH] cxl: Update Soft Reserved resources upon region creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 12/2/2024 12:56 PM, Fan Ni wrote:
> On Mon, Dec 02, 2024 at 09:55:42AM -0600, Nathan Fontenot wrote:
>> Update handling of SOFT RESERVE iomem resources that intersect with
>> CXL region resources to remove the intersections from the SOFT RESERVE
>> resources. The current approach of leaving the SOFT RESERVE
>> resource as is can cause failures during hotplug replace of CXL
>> devices because the resource is not available for reuse after
>> teardown of the CXL device.
>>
>> The approach is to trim out any pieces of SOFT RESERVE resources
>> that intersect CXL regions. To do this, first set aside any SOFT RESERVE
>> resources that intersect with a CFMWS into a separate resource tree
>> during e820__reserve_resources_late() that would have been otherwise
>> added to the iomem resource tree.
>>
>> As CXL regions are created the cxl resource created for the new
>> region is used to trim intersections from the SOFT RESERVE
>> resources that were previously set aside.
>>
>> Once CXL device probe has completed ant remaining SOFT RESERVE resources
> /ant/any/
>> remaining are added to the iomem resource tree. As each resource
>> is added to the oiomem resource tree a new notifier chain is invoked
> /oiomem/iomem/

I'll fix spelling mistakes for next version.

>> to notify the dax driver of newly added SOFT RESERVE resources so that
>> the dax driver can consume them.
> 
> In general, the patch is kind of complicated and hard to review for me.
> I am wondering if it can be broken down to make it easier to
> review.

I had thought about possibly breaking this into a few patches but wanted to get it
out for review. I'll split it up for the next version.

> 
> One minor thing inline.
> 
>>
>> Signed-off-by: Nathan Fontenot <nathan.fontenot@xxxxxxx>
>> ---
>>  arch/x86/kernel/e820.c    |  17 ++++-
>>  drivers/cxl/core/region.c |   8 +-
>>  drivers/cxl/port.c        |  15 ++++
>>  drivers/dax/hmem/device.c |  13 ++--
>>  drivers/dax/hmem/hmem.c   |  15 ++++
>>  drivers/dax/hmem/hmem.h   |  11 +++
>>  include/linux/dax.h       |   4 -
>>  include/linux/ioport.h    |   6 ++
>>  kernel/resource.c         | 155 +++++++++++++++++++++++++++++++++++++-
>>  9 files changed, 229 insertions(+), 15 deletions(-)
>>  create mode 100644 drivers/dax/hmem/hmem.h
>>
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index 4893d30ce438..cab82e9324a5 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -1210,14 +1210,23 @@ static unsigned long __init ram_alignment(resource_size_t pos)
>>  
>>  void __init e820__reserve_resources_late(void)
>>  {
>> -	int i;
>>  	struct resource *res;
>> +	int i;
>>  
>> +	/*
>> +	 * Prior to inserting SOFT_RESERVED resources we want to check for an
>> +	 * intersection with potential CXL resources. Any SOFT_RESERVED resources
>> +	 * that do intersect a potential CXL resource are set aside so they
>> +	 * can be trimmed to accommodate CXL resource intersections and added to
>> +	 * the iomem resource tree after the CXL drivers have completed their
>> +	 * device probe.
>> +	 */
>>  	res = e820_res;
>> -	for (i = 0; i < e820_table->nr_entries; i++) {
>> -		if (!res->parent && res->end)
>> +	for (i = 0; i < e820_table->nr_entries; i++, res++) {
>> +		if (res->desc == IORES_DESC_SOFT_RESERVED)
>> +			insert_soft_reserve_resource(res);
>> +		else if (!res->parent && res->end)
>>  			insert_resource_expand_to_fit(&iomem_resource, res);
>> -		res++;
> Maybe we can keep the original (res++) here to avoid the noise since it is a
> style thing and does not affect what we want to achieve. 

Yes, I can make that change.

-Nathan

> 
> Fan
>>  	}
>>  
>>  	/*
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index 21ad5f242875..c458a6313b31 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -3226,6 +3226,12 @@ static int match_region_by_range(struct device *dev, void *data)
>>  	return rc;
>>  }
>>  
>> +static int insert_region_resource(struct resource *parent, struct resource *res)
>> +{
>> +	trim_soft_reserve_resources(res);
>> +	return insert_resource(parent, res);
>> +}
>> +
>>  /* Establish an empty region covering the given HPA range */
>>  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>  					   struct cxl_endpoint_decoder *cxled)
>> @@ -3272,7 +3278,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>  
>>  	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>>  				    dev_name(&cxlr->dev));
>> -	rc = insert_resource(cxlrd->res, res);
>> +	rc = insert_region_resource(cxlrd->res, res);
>>  	if (rc) {
>>  		/*
>>  		 * Platform-firmware may not have split resources like "System
>> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
>> index d7d5d982ce69..4461f2a80d72 100644
>> --- a/drivers/cxl/port.c
>> +++ b/drivers/cxl/port.c
>> @@ -89,6 +89,20 @@ static int cxl_switch_port_probe(struct cxl_port *port)
>>  	return -ENXIO;
>>  }
>>  
>> +static void cxl_sr_update(struct work_struct *w)
>> +{
>> +	merge_soft_reserve_resources();
>> +}
>> +
>> +DECLARE_DELAYED_WORK(cxl_sr_work, cxl_sr_update);
>> +
>> +static void schedule_soft_reserve_update(void)
>> +{
>> +	int timeout = 5 * HZ;
>> +
>> +	mod_delayed_work(system_wq, &cxl_sr_work, timeout);
>> +}
>> +
>>  static int cxl_endpoint_port_probe(struct cxl_port *port)
>>  {
>>  	struct cxl_endpoint_dvsec_info info = { .port = port };
>> @@ -140,6 +154,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
>>  	 */
>>  	device_for_each_child(&port->dev, root, discover_region);
>>  
>> +	schedule_soft_reserve_update();
>>  	return 0;
>>  }
>>  
>> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
>> index f9e1a76a04a9..c45791ad4858 100644
>> --- a/drivers/dax/hmem/device.c
>> +++ b/drivers/dax/hmem/device.c
>> @@ -4,6 +4,7 @@
>>  #include <linux/module.h>
>>  #include <linux/dax.h>
>>  #include <linux/mm.h>
>> +#include "hmem.h"
>>  
>>  static bool nohmem;
>>  module_param_named(disable, nohmem, bool, 0444);
>> @@ -17,6 +18,9 @@ static struct resource hmem_active = {
>>  	.flags = IORESOURCE_MEM,
>>  };
>>  
>> +struct platform_device *hmem_pdev;
>> +EXPORT_SYMBOL_GPL(hmem_pdev);
>> +
>>  int walk_hmem_resources(struct device *host, walk_hmem_fn fn)
>>  {
>>  	struct resource *res;
>> @@ -35,7 +39,6 @@ EXPORT_SYMBOL_GPL(walk_hmem_resources);
>>  
>>  static void __hmem_register_resource(int target_nid, struct resource *res)
>>  {
>> -	struct platform_device *pdev;
>>  	struct resource *new;
>>  	int rc;
>>  
>> @@ -51,15 +54,15 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>>  	if (platform_initialized)
>>  		return;
>>  
>> -	pdev = platform_device_alloc("hmem_platform", 0);
>> -	if (!pdev) {
>> +	hmem_pdev = platform_device_alloc("hmem_platform", 0);
>> +	if (!hmem_pdev) {
>>  		pr_err_once("failed to register device-dax hmem_platform device\n");
>>  		return;
>>  	}
>>  
>> -	rc = platform_device_add(pdev);
>> +	rc = platform_device_add(hmem_pdev);
>>  	if (rc)
>> -		platform_device_put(pdev);
>> +		platform_device_put(hmem_pdev);
>>  	else
>>  		platform_initialized = true;
>>  }
>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
>> index 5e7c53f18491..d626b60a9716 100644
>> --- a/drivers/dax/hmem/hmem.c
>> +++ b/drivers/dax/hmem/hmem.c
>> @@ -5,6 +5,7 @@
>>  #include <linux/pfn_t.h>
>>  #include <linux/dax.h>
>>  #include "../bus.h"
>> +#include "hmem.h"
>>  
>>  static bool region_idle;
>>  module_param_named(region_idle, region_idle, bool, 0644);
>> @@ -123,8 +124,22 @@ static int hmem_register_device(struct device *host, int target_nid,
>>  	return rc;
>>  }
>>  
>> +static int dax_hmem_cb(struct notifier_block *nb, unsigned long action,
>> +		       void *arg)
>> +{
>> +	struct resource *res = arg;
>> +
>> +	return hmem_register_device(&hmem_pdev->dev,
>> +				    phys_to_target_node(res->start), res);
>> +}
>> +
>> +static struct notifier_block hmem_nb = {
>> +	.notifier_call = dax_hmem_cb
>> +};
>> +
>>  static int dax_hmem_platform_probe(struct platform_device *pdev)
>>  {
>> +	register_soft_reserve_notifier(&hmem_nb);
>>  	return walk_hmem_resources(&pdev->dev, hmem_register_device);
>>  }
>>  
>> diff --git a/drivers/dax/hmem/hmem.h b/drivers/dax/hmem/hmem.h
>> new file mode 100644
>> index 000000000000..95583b59cef7
>> --- /dev/null
>> +++ b/drivers/dax/hmem/hmem.h
>> @@ -0,0 +1,11 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#ifndef _HMEM_H
>> +#define _HMEM_H
>> +
>> +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
>> +			    const struct resource *res);
>> +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
>> +
>> +extern struct platform_device *hmem_pdev;
>> +
>> +#endif
>> diff --git a/include/linux/dax.h b/include/linux/dax.h
>> index 9d3e3327af4c..119b4e27a592 100644
>> --- a/include/linux/dax.h
>> +++ b/include/linux/dax.h
>> @@ -282,8 +282,4 @@ static inline void hmem_register_resource(int target_nid, struct resource *r)
>>  {
>>  }
>>  #endif
>> -
>> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid,
>> -			    const struct resource *res);
>> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn);
>>  #endif
>> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
>> index 6e9fb667a1c5..487371a46392 100644
>> --- a/include/linux/ioport.h
>> +++ b/include/linux/ioport.h
>> @@ -14,6 +14,7 @@
>>  #include <linux/compiler.h>
>>  #include <linux/minmax.h>
>>  #include <linux/types.h>
>> +#include <linux/notifier.h>
>>  /*
>>   * Resources are tree-like, allowing
>>   * nesting etc..
>> @@ -249,6 +250,11 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start);
>>  int adjust_resource(struct resource *res, resource_size_t start,
>>  		    resource_size_t size);
>>  resource_size_t resource_alignment(struct resource *res);
>> +extern void trim_soft_reserve_resources(const struct resource *res);
>> +extern void merge_soft_reserve_resources(void);
>> +extern int insert_soft_reserve_resource(struct resource *res);
>> +extern int register_soft_reserve_notifier(struct notifier_block *nb);
>> +extern int unregister_soft_reserve_notifier(struct notifier_block *nb);
>>  static inline resource_size_t resource_size(const struct resource *res)
>>  {
>>  	return res->end - res->start + 1;
>> diff --git a/kernel/resource.c b/kernel/resource.c
>> index a83040fde236..8fc4121a1887 100644
>> --- a/kernel/resource.c
>> +++ b/kernel/resource.c
>> @@ -30,7 +30,7 @@
>>  #include <linux/string.h>
>>  #include <linux/vmalloc.h>
>>  #include <asm/io.h>
>> -
>> +#include <linux/acpi.h>
>>  
>>  struct resource ioport_resource = {
>>  	.name	= "PCI IO",
>> @@ -48,7 +48,15 @@ struct resource iomem_resource = {
>>  };
>>  EXPORT_SYMBOL(iomem_resource);
>>  
>> +struct resource srmem_resource = {
>> +	.name	= "Soft Reserved mem",
>> +	.start	= 0,
>> +	.end	= -1,
>> +	.flags	= IORESOURCE_MEM,
>> +};
>> +
>>  static DEFINE_RWLOCK(resource_lock);
>> +static DEFINE_RWLOCK(srmem_resource_lock);
>>  
>>  static struct resource *next_resource(struct resource *p, bool skip_children)
>>  {
>> @@ -1034,6 +1042,151 @@ int adjust_resource(struct resource *res, resource_size_t start,
>>  }
>>  EXPORT_SYMBOL(adjust_resource);
>>  
>> +static BLOCKING_NOTIFIER_HEAD(soft_reserve_chain);
>> +
>> +int register_soft_reserve_notifier(struct notifier_block *nb)
>> +{
>> +	return blocking_notifier_chain_register(&soft_reserve_chain, nb);
>> +}
>> +EXPORT_SYMBOL(register_soft_reserve_notifier);
>> +
>> +int unregister_soft_reserve_notifier(struct notifier_block *nb)
>> +{
>> +	return blocking_notifier_chain_unregister(&soft_reserve_chain, nb);
>> +}
>> +EXPORT_SYMBOL(unregister_soft_reserve_notifier);
>> +
>> +static int soft_reserve_notify(unsigned long val, void *v)
>> +{
>> +	struct resource *res = v;
>> +
>> +	pr_info("Adding Soft Reserve resource %pr\n", res);
>> +	return blocking_notifier_call_chain(&soft_reserve_chain, val, v);
>> +}
>> +
>> +static void trim_soft_reserve(struct resource *sr_res,
>> +			      const struct resource *res)
>> +{
>> +	struct resource *new_res;
>> +
>> +	if (sr_res->start == res->start && sr_res->end == res->end) {
>> +		release_resource(sr_res);
>> +		free_resource(sr_res);
>> +	} else if (sr_res->start == res->start) {
>> +		WARN_ON(adjust_resource(sr_res, res->end + 1,
>> +					sr_res->end - res->end));
>> +	} else if (sr_res->end == res->end) {
>> +		WARN_ON(adjust_resource(sr_res, sr_res->start,
>> +					res->start - sr_res->start));
>> +	} else {
>> +		/*
>> +		 * Adjust existing resource to cover the resource
>> +		 * range prior to the range to be trimmed.
>> +		 */
>> +		adjust_resource(sr_res, sr_res->start,
>> +				res->start - sr_res->start);
>> +
>> +		/*
>> +		 * Add new resource to cover the resource range for
>> +		 * the range after the range to be trimmed.
>> +		 */
>> +		new_res = alloc_resource(GFP_KERNEL);
>> +		if (!new_res)
>> +			return;
>> +
>> +		*new_res = DEFINE_RES_NAMED(res->end + 1, sr_res->end - res->end,
>> +					    "Soft Reserved", sr_res->flags);
>> +		new_res->desc = IORES_DESC_SOFT_RESERVED;
>> +		insert_resource(&srmem_resource, new_res);
>> +	}
>> +}
>> +
>> +void trim_soft_reserve_resources(const struct resource *res)
>> +{
>> +	struct resource *sr_res;
>> +
>> +	write_lock(&srmem_resource_lock);
>> +	for (sr_res = srmem_resource.child; sr_res; sr_res = sr_res->sibling) {
>> +		if (resource_contains(sr_res, res)) {
>> +			trim_soft_reserve(sr_res, res);
>> +			break;
>> +		}
>> +	}
>> +	write_unlock(&srmem_resource_lock);
>> +}
>> +EXPORT_SYMBOL(trim_soft_reserve_resources);
>> +
>> +void merge_soft_reserve_resources(void)
>> +{
>> +	struct resource *sr_res, *next;
>> +
>> +	write_lock(&srmem_resource_lock);
>> +	for (sr_res = srmem_resource.child; sr_res; sr_res = next) {
>> +		next = sr_res->sibling;
>> +
>> +		release_resource(sr_res);
>> +		if (insert_resource(&iomem_resource, sr_res))
>> +			pr_info("Could not add Soft Reserve %pr\n", sr_res);
>> +		else
>> +			soft_reserve_notify(0, sr_res);
>> +	}
>> +	write_unlock(&srmem_resource_lock);
>> +}
>> +EXPORT_SYMBOL(merge_soft_reserve_resources);
>> +
>> +struct srmem_arg {
>> +	struct resource *res;
>> +	int overlaps;
>> +};
>> +
>> +static int srmem_parse_cfmws(union acpi_subtable_headers *hdr,
>> +			     void *arg, const unsigned long unused)
>> +{
>> +	struct acpi_cedt_cfmws *cfmws;
>> +	struct srmem_arg *args = arg;
>> +	struct resource cfmws_res;
>> +	struct resource *res;
>> +
>> +	res = args->res;
>> +
>> +	cfmws = (struct acpi_cedt_cfmws *)hdr;
>> +	cfmws_res = DEFINE_RES_MEM(cfmws->base_hpa,
>> +				   cfmws->base_hpa + cfmws->window_size);
>> +
>> +	if (resource_overlaps(&cfmws_res, res)) {
>> +		args->overlaps += 1;
>> +		return 1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static bool resource_overlaps_cfmws(struct resource *res)
>> +{
>> +	struct srmem_arg arg = {
>> +		.res = res,
>> +		.overlaps = 0
>> +	};
>> +
>> +	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, srmem_parse_cfmws, &arg);
>> +
>> +	if (arg.overlaps)
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>> +int insert_soft_reserve_resource(struct resource *res)
>> +{
>> +	if (resource_overlaps_cfmws(res)) {
>> +		pr_info("Reserving Soft Reserve %pr\n", res);
>> +		return insert_resource(&srmem_resource, res);
>> +	}
>> +
>> +	return insert_resource(&iomem_resource, res);
>> +}
>> +EXPORT_SYMBOL(insert_soft_reserve_resource);
>> +
>>  static void __init
>>  __reserve_region_with_split(struct resource *root, resource_size_t start,
>>  			    resource_size_t end, const char *name)
>> -- 
>> 2.43.0
>>
> 





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux