On 12/2/2024 12:56 PM, Fan Ni wrote: > On Mon, Dec 02, 2024 at 09:55:42AM -0600, Nathan Fontenot wrote: >> Update handling of SOFT RESERVE iomem resources that intersect with >> CXL region resources to remove the intersections from the SOFT RESERVE >> resources. The current approach of leaving the SOFT RESERVE >> resource as is can cause failures during hotplug replace of CXL >> devices because the resource is not available for reuse after >> teardown of the CXL device. >> >> The approach is to trim out any pieces of SOFT RESERVE resources >> that intersect CXL regions. To do this, first set aside any SOFT RESERVE >> resources that intersect with a CFMWS into a separate resource tree >> during e820__reserve_resources_late() that would have been otherwise >> added to the iomem resource tree. >> >> As CXL regions are created the cxl resource created for the new >> region is used to trim intersections from the SOFT RESERVE >> resources that were previously set aside. >> >> Once CXL device probe has completed ant remaining SOFT RESERVE resources > /ant/any/ >> remaining are added to the iomem resource tree. As each resource >> is added to the oiomem resource tree a new notifier chain is invoked > /oiomem/iomem/ I'll fix spelling mistakes for next version. >> to notify the dax driver of newly added SOFT RESERVE resources so that >> the dax driver can consume them. > > In general, the patch is kind of complicated and hard to review for me. > I am wondering if it can be broken down to make it easier to > review. I had thought about possibly breaking this into a few patches but wanted to get it out for review. I'll split it up for the next version. > > One minor thing inline. > >> >> Signed-off-by: Nathan Fontenot <nathan.fontenot@xxxxxxx> >> --- >> arch/x86/kernel/e820.c | 17 ++++- >> drivers/cxl/core/region.c | 8 +- >> drivers/cxl/port.c | 15 ++++ >> drivers/dax/hmem/device.c | 13 ++-- >> drivers/dax/hmem/hmem.c | 15 ++++ >> drivers/dax/hmem/hmem.h | 11 +++ >> include/linux/dax.h | 4 - >> include/linux/ioport.h | 6 ++ >> kernel/resource.c | 155 +++++++++++++++++++++++++++++++++++++- >> 9 files changed, 229 insertions(+), 15 deletions(-) >> create mode 100644 drivers/dax/hmem/hmem.h >> >> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c >> index 4893d30ce438..cab82e9324a5 100644 >> --- a/arch/x86/kernel/e820.c >> +++ b/arch/x86/kernel/e820.c >> @@ -1210,14 +1210,23 @@ static unsigned long __init ram_alignment(resource_size_t pos) >> >> void __init e820__reserve_resources_late(void) >> { >> - int i; >> struct resource *res; >> + int i; >> >> + /* >> + * Prior to inserting SOFT_RESERVED resources we want to check for an >> + * intersection with potential CXL resources. Any SOFT_RESERVED resources >> + * that do intersect a potential CXL resource are set aside so they >> + * can be trimmed to accommodate CXL resource intersections and added to >> + * the iomem resource tree after the CXL drivers have completed their >> + * device probe. >> + */ >> res = e820_res; >> - for (i = 0; i < e820_table->nr_entries; i++) { >> - if (!res->parent && res->end) >> + for (i = 0; i < e820_table->nr_entries; i++, res++) { >> + if (res->desc == IORES_DESC_SOFT_RESERVED) >> + insert_soft_reserve_resource(res); >> + else if (!res->parent && res->end) >> insert_resource_expand_to_fit(&iomem_resource, res); >> - res++; > Maybe we can keep the original (res++) here to avoid the noise since it is a > style thing and does not affect what we want to achieve. Yes, I can make that change. -Nathan > > Fan >> } >> >> /* >> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c >> index 21ad5f242875..c458a6313b31 100644 >> --- a/drivers/cxl/core/region.c >> +++ b/drivers/cxl/core/region.c >> @@ -3226,6 +3226,12 @@ static int match_region_by_range(struct device *dev, void *data) >> return rc; >> } >> >> +static int insert_region_resource(struct resource *parent, struct resource *res) >> +{ >> + trim_soft_reserve_resources(res); >> + return insert_resource(parent, res); >> +} >> + >> /* Establish an empty region covering the given HPA range */ >> static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, >> struct cxl_endpoint_decoder *cxled) >> @@ -3272,7 +3278,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, >> >> *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), >> dev_name(&cxlr->dev)); >> - rc = insert_resource(cxlrd->res, res); >> + rc = insert_region_resource(cxlrd->res, res); >> if (rc) { >> /* >> * Platform-firmware may not have split resources like "System >> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c >> index d7d5d982ce69..4461f2a80d72 100644 >> --- a/drivers/cxl/port.c >> +++ b/drivers/cxl/port.c >> @@ -89,6 +89,20 @@ static int cxl_switch_port_probe(struct cxl_port *port) >> return -ENXIO; >> } >> >> +static void cxl_sr_update(struct work_struct *w) >> +{ >> + merge_soft_reserve_resources(); >> +} >> + >> +DECLARE_DELAYED_WORK(cxl_sr_work, cxl_sr_update); >> + >> +static void schedule_soft_reserve_update(void) >> +{ >> + int timeout = 5 * HZ; >> + >> + mod_delayed_work(system_wq, &cxl_sr_work, timeout); >> +} >> + >> static int cxl_endpoint_port_probe(struct cxl_port *port) >> { >> struct cxl_endpoint_dvsec_info info = { .port = port }; >> @@ -140,6 +154,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) >> */ >> device_for_each_child(&port->dev, root, discover_region); >> >> + schedule_soft_reserve_update(); >> return 0; >> } >> >> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c >> index f9e1a76a04a9..c45791ad4858 100644 >> --- a/drivers/dax/hmem/device.c >> +++ b/drivers/dax/hmem/device.c >> @@ -4,6 +4,7 @@ >> #include <linux/module.h> >> #include <linux/dax.h> >> #include <linux/mm.h> >> +#include "hmem.h" >> >> static bool nohmem; >> module_param_named(disable, nohmem, bool, 0444); >> @@ -17,6 +18,9 @@ static struct resource hmem_active = { >> .flags = IORESOURCE_MEM, >> }; >> >> +struct platform_device *hmem_pdev; >> +EXPORT_SYMBOL_GPL(hmem_pdev); >> + >> int walk_hmem_resources(struct device *host, walk_hmem_fn fn) >> { >> struct resource *res; >> @@ -35,7 +39,6 @@ EXPORT_SYMBOL_GPL(walk_hmem_resources); >> >> static void __hmem_register_resource(int target_nid, struct resource *res) >> { >> - struct platform_device *pdev; >> struct resource *new; >> int rc; >> >> @@ -51,15 +54,15 @@ static void __hmem_register_resource(int target_nid, struct resource *res) >> if (platform_initialized) >> return; >> >> - pdev = platform_device_alloc("hmem_platform", 0); >> - if (!pdev) { >> + hmem_pdev = platform_device_alloc("hmem_platform", 0); >> + if (!hmem_pdev) { >> pr_err_once("failed to register device-dax hmem_platform device\n"); >> return; >> } >> >> - rc = platform_device_add(pdev); >> + rc = platform_device_add(hmem_pdev); >> if (rc) >> - platform_device_put(pdev); >> + platform_device_put(hmem_pdev); >> else >> platform_initialized = true; >> } >> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c >> index 5e7c53f18491..d626b60a9716 100644 >> --- a/drivers/dax/hmem/hmem.c >> +++ b/drivers/dax/hmem/hmem.c >> @@ -5,6 +5,7 @@ >> #include <linux/pfn_t.h> >> #include <linux/dax.h> >> #include "../bus.h" >> +#include "hmem.h" >> >> static bool region_idle; >> module_param_named(region_idle, region_idle, bool, 0644); >> @@ -123,8 +124,22 @@ static int hmem_register_device(struct device *host, int target_nid, >> return rc; >> } >> >> +static int dax_hmem_cb(struct notifier_block *nb, unsigned long action, >> + void *arg) >> +{ >> + struct resource *res = arg; >> + >> + return hmem_register_device(&hmem_pdev->dev, >> + phys_to_target_node(res->start), res); >> +} >> + >> +static struct notifier_block hmem_nb = { >> + .notifier_call = dax_hmem_cb >> +}; >> + >> static int dax_hmem_platform_probe(struct platform_device *pdev) >> { >> + register_soft_reserve_notifier(&hmem_nb); >> return walk_hmem_resources(&pdev->dev, hmem_register_device); >> } >> >> diff --git a/drivers/dax/hmem/hmem.h b/drivers/dax/hmem/hmem.h >> new file mode 100644 >> index 000000000000..95583b59cef7 >> --- /dev/null >> +++ b/drivers/dax/hmem/hmem.h >> @@ -0,0 +1,11 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +#ifndef _HMEM_H >> +#define _HMEM_H >> + >> +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, >> + const struct resource *res); >> +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); >> + >> +extern struct platform_device *hmem_pdev; >> + >> +#endif >> diff --git a/include/linux/dax.h b/include/linux/dax.h >> index 9d3e3327af4c..119b4e27a592 100644 >> --- a/include/linux/dax.h >> +++ b/include/linux/dax.h >> @@ -282,8 +282,4 @@ static inline void hmem_register_resource(int target_nid, struct resource *r) >> { >> } >> #endif >> - >> -typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, >> - const struct resource *res); >> -int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); >> #endif >> diff --git a/include/linux/ioport.h b/include/linux/ioport.h >> index 6e9fb667a1c5..487371a46392 100644 >> --- a/include/linux/ioport.h >> +++ b/include/linux/ioport.h >> @@ -14,6 +14,7 @@ >> #include <linux/compiler.h> >> #include <linux/minmax.h> >> #include <linux/types.h> >> +#include <linux/notifier.h> >> /* >> * Resources are tree-like, allowing >> * nesting etc.. >> @@ -249,6 +250,11 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start); >> int adjust_resource(struct resource *res, resource_size_t start, >> resource_size_t size); >> resource_size_t resource_alignment(struct resource *res); >> +extern void trim_soft_reserve_resources(const struct resource *res); >> +extern void merge_soft_reserve_resources(void); >> +extern int insert_soft_reserve_resource(struct resource *res); >> +extern int register_soft_reserve_notifier(struct notifier_block *nb); >> +extern int unregister_soft_reserve_notifier(struct notifier_block *nb); >> static inline resource_size_t resource_size(const struct resource *res) >> { >> return res->end - res->start + 1; >> diff --git a/kernel/resource.c b/kernel/resource.c >> index a83040fde236..8fc4121a1887 100644 >> --- a/kernel/resource.c >> +++ b/kernel/resource.c >> @@ -30,7 +30,7 @@ >> #include <linux/string.h> >> #include <linux/vmalloc.h> >> #include <asm/io.h> >> - >> +#include <linux/acpi.h> >> >> struct resource ioport_resource = { >> .name = "PCI IO", >> @@ -48,7 +48,15 @@ struct resource iomem_resource = { >> }; >> EXPORT_SYMBOL(iomem_resource); >> >> +struct resource srmem_resource = { >> + .name = "Soft Reserved mem", >> + .start = 0, >> + .end = -1, >> + .flags = IORESOURCE_MEM, >> +}; >> + >> static DEFINE_RWLOCK(resource_lock); >> +static DEFINE_RWLOCK(srmem_resource_lock); >> >> static struct resource *next_resource(struct resource *p, bool skip_children) >> { >> @@ -1034,6 +1042,151 @@ int adjust_resource(struct resource *res, resource_size_t start, >> } >> EXPORT_SYMBOL(adjust_resource); >> >> +static BLOCKING_NOTIFIER_HEAD(soft_reserve_chain); >> + >> +int register_soft_reserve_notifier(struct notifier_block *nb) >> +{ >> + return blocking_notifier_chain_register(&soft_reserve_chain, nb); >> +} >> +EXPORT_SYMBOL(register_soft_reserve_notifier); >> + >> +int unregister_soft_reserve_notifier(struct notifier_block *nb) >> +{ >> + return blocking_notifier_chain_unregister(&soft_reserve_chain, nb); >> +} >> +EXPORT_SYMBOL(unregister_soft_reserve_notifier); >> + >> +static int soft_reserve_notify(unsigned long val, void *v) >> +{ >> + struct resource *res = v; >> + >> + pr_info("Adding Soft Reserve resource %pr\n", res); >> + return blocking_notifier_call_chain(&soft_reserve_chain, val, v); >> +} >> + >> +static void trim_soft_reserve(struct resource *sr_res, >> + const struct resource *res) >> +{ >> + struct resource *new_res; >> + >> + if (sr_res->start == res->start && sr_res->end == res->end) { >> + release_resource(sr_res); >> + free_resource(sr_res); >> + } else if (sr_res->start == res->start) { >> + WARN_ON(adjust_resource(sr_res, res->end + 1, >> + sr_res->end - res->end)); >> + } else if (sr_res->end == res->end) { >> + WARN_ON(adjust_resource(sr_res, sr_res->start, >> + res->start - sr_res->start)); >> + } else { >> + /* >> + * Adjust existing resource to cover the resource >> + * range prior to the range to be trimmed. >> + */ >> + adjust_resource(sr_res, sr_res->start, >> + res->start - sr_res->start); >> + >> + /* >> + * Add new resource to cover the resource range for >> + * the range after the range to be trimmed. >> + */ >> + new_res = alloc_resource(GFP_KERNEL); >> + if (!new_res) >> + return; >> + >> + *new_res = DEFINE_RES_NAMED(res->end + 1, sr_res->end - res->end, >> + "Soft Reserved", sr_res->flags); >> + new_res->desc = IORES_DESC_SOFT_RESERVED; >> + insert_resource(&srmem_resource, new_res); >> + } >> +} >> + >> +void trim_soft_reserve_resources(const struct resource *res) >> +{ >> + struct resource *sr_res; >> + >> + write_lock(&srmem_resource_lock); >> + for (sr_res = srmem_resource.child; sr_res; sr_res = sr_res->sibling) { >> + if (resource_contains(sr_res, res)) { >> + trim_soft_reserve(sr_res, res); >> + break; >> + } >> + } >> + write_unlock(&srmem_resource_lock); >> +} >> +EXPORT_SYMBOL(trim_soft_reserve_resources); >> + >> +void merge_soft_reserve_resources(void) >> +{ >> + struct resource *sr_res, *next; >> + >> + write_lock(&srmem_resource_lock); >> + for (sr_res = srmem_resource.child; sr_res; sr_res = next) { >> + next = sr_res->sibling; >> + >> + release_resource(sr_res); >> + if (insert_resource(&iomem_resource, sr_res)) >> + pr_info("Could not add Soft Reserve %pr\n", sr_res); >> + else >> + soft_reserve_notify(0, sr_res); >> + } >> + write_unlock(&srmem_resource_lock); >> +} >> +EXPORT_SYMBOL(merge_soft_reserve_resources); >> + >> +struct srmem_arg { >> + struct resource *res; >> + int overlaps; >> +}; >> + >> +static int srmem_parse_cfmws(union acpi_subtable_headers *hdr, >> + void *arg, const unsigned long unused) >> +{ >> + struct acpi_cedt_cfmws *cfmws; >> + struct srmem_arg *args = arg; >> + struct resource cfmws_res; >> + struct resource *res; >> + >> + res = args->res; >> + >> + cfmws = (struct acpi_cedt_cfmws *)hdr; >> + cfmws_res = DEFINE_RES_MEM(cfmws->base_hpa, >> + cfmws->base_hpa + cfmws->window_size); >> + >> + if (resource_overlaps(&cfmws_res, res)) { >> + args->overlaps += 1; >> + return 1; >> + } >> + >> + return 0; >> +} >> + >> +static bool resource_overlaps_cfmws(struct resource *res) >> +{ >> + struct srmem_arg arg = { >> + .res = res, >> + .overlaps = 0 >> + }; >> + >> + acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, srmem_parse_cfmws, &arg); >> + >> + if (arg.overlaps) >> + return true; >> + >> + return false; >> +} >> + >> +int insert_soft_reserve_resource(struct resource *res) >> +{ >> + if (resource_overlaps_cfmws(res)) { >> + pr_info("Reserving Soft Reserve %pr\n", res); >> + return insert_resource(&srmem_resource, res); >> + } >> + >> + return insert_resource(&iomem_resource, res); >> +} >> +EXPORT_SYMBOL(insert_soft_reserve_resource); >> + >> static void __init >> __reserve_region_with_split(struct resource *root, resource_size_t start, >> resource_size_t end, const char *name) >> -- >> 2.43.0 >> >