On 1/18/2025 1:28 AM, Dave Jiang wrote: > Below is a setup with extended linear cache configuration with an example > layout of memory region shown below presented as a single memory region > consists of 256G memory where there's 128G of DRAM and 128G of CXL memory. > The kernel sees a region of total 256G of system memory. > > 128G DRAM 128G CXL memory > |-----------------------------------|-------------------------------------| > > Data resides in either DRAM or far memory (FM) with no replication. Hot > data is swapped into DRAM by the hardware behind the scenes. When error is > detected in one location, it is possible that error also resides in the > aliased location. Therefore when a memory location that is flagged by MCE > is part of the special region, the aliased memory location needs to be > offlined as well. > > Add an mce notify callback to identify if the MCE address location is part > of an extended linear cache region and handle accordingly. > > Added symbol export to set_mce_nospec() in x86 code in order to call > set_mce_nospec() from the CXL MCE notify callback. > > Link: https://lore.kernel.org/linux-cxl/668333b17e4b2_5639294fd@xxxxxxxxxxxxxxxxxxxxxxxxx.notmuch/ > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> > Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> > --- > v3: > - Add endpoint pointer check. (Ming) > - Add mce notifier removal. (Ming) > - Return ~0ULL for no cache alias. > --- > arch/x86/mm/pat/set_memory.c | 1 + > drivers/cxl/Kconfig | 4 +++ > drivers/cxl/core/Makefile | 1 + > drivers/cxl/core/mbox.c | 8 +++++ > drivers/cxl/core/mce.c | 63 ++++++++++++++++++++++++++++++++++++ > drivers/cxl/core/mce.h | 16 +++++++++ > drivers/cxl/core/region.c | 28 ++++++++++++++++ > drivers/cxl/cxl.h | 6 ++++ > drivers/cxl/cxlmem.h | 2 ++ > tools/testing/cxl/Kbuild | 1 + > 10 files changed, 130 insertions(+) > create mode 100644 drivers/cxl/core/mce.c > create mode 100644 drivers/cxl/core/mce.h > > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > index 95bc50a8541c..a0df698f46a2 100644 > --- a/arch/x86/mm/pat/set_memory.c > +++ b/arch/x86/mm/pat/set_memory.c > @@ -2083,6 +2083,7 @@ int set_mce_nospec(unsigned long pfn) > pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); > return rc; > } > +EXPORT_SYMBOL_GPL(set_mce_nospec); > > /* Restore full speculative operation to the pfn. */ > int clear_mce_nospec(unsigned long pfn) > diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig > index 876469e23f7a..d1c91dacae56 100644 > --- a/drivers/cxl/Kconfig > +++ b/drivers/cxl/Kconfig > @@ -146,4 +146,8 @@ config CXL_REGION_INVALIDATION_TEST > If unsure, or if this kernel is meant for production environments, > say N. > > +config CXL_MCE > + def_bool y > + depends on X86_MCE && MEMORY_FAILURE > + > endif > diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile > index 1a0c9c6ca818..61c9332b3582 100644 > --- a/drivers/cxl/core/Makefile > +++ b/drivers/cxl/core/Makefile > @@ -17,3 +17,4 @@ cxl_core-y += cdat.o > cxl_core-y += acpi.o > cxl_core-$(CONFIG_TRACING) += trace.o > cxl_core-$(CONFIG_CXL_REGION) += region.o > +cxl_core-$(CONFIG_CXL_MCE) += mce.o > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index f42c4c56dc43..ad11f49cb117 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -11,6 +11,7 @@ > > #include "core.h" > #include "trace.h" > +#include "mce.h" > > static bool cxl_raw_allow_all; > > @@ -1458,6 +1459,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); > struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) > { > struct cxl_memdev_state *mds; > + int rc; > > mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); > if (!mds) { > @@ -1473,6 +1475,12 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) > mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; > mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; > > + cxl_register_mce_notifier(&mds->mce_notifier); > + rc = devm_add_action_or_reset(dev, cxl_unregister_mce_notifier, > + &mds->mce_notifier); > + if (rc) > + return ERR_PTR(rc); > + maybe we can put this devm release action into cxl_register_mce_notifier() and rename cxl_register_mce_notifier() to devm_cxl_register_mce_notifier()? > return mds; > } > EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL"); > diff --git a/drivers/cxl/core/mce.c b/drivers/cxl/core/mce.c > new file mode 100644 > index 000000000000..dab5acce249e > --- /dev/null > +++ b/drivers/cxl/core/mce.c > @@ -0,0 +1,63 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright(c) 2024 Intel Corporation. All rights reserved. */ > +#include <linux/mm.h> > +#include <linux/notifier.h> > +#include <linux/set_memory.h> > +#include <asm/mce.h> > +#include <cxlmem.h> > +#include "mce.h" > + > +static int cxl_handle_mce(struct notifier_block *nb, unsigned long val, > + void *data) > +{ > + struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state, > + mce_notifier); > + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; > + struct cxl_port *endpoint = cxlmd->endpoint; > + struct mce *mce = data; > + u64 spa, spa_alias; > + unsigned long pfn; > + > + if (!mce || !mce_usable_address(mce)) > + return NOTIFY_DONE; > + > + if (!endpoint) > + return NOTIFY_DONE; > + > + spa = mce->addr & MCI_ADDR_PHYSADDR; > + > + pfn = spa >> PAGE_SHIFT; > + if (!pfn_valid(pfn)) > + return NOTIFY_DONE; > + > + spa_alias = cxl_port_get_spa_cache_alias(endpoint, spa); > + if (spa_alias == ~0ULL) > + return NOTIFY_DONE; > + > + pfn = spa_alias >> PAGE_SHIFT; > + > + /* > + * Take down the aliased memory page. The original memory page flagged > + * by the MCE will be taken cared of by the standard MCE handler. > + */ > + dev_emerg(mds->cxlds.dev, "Offlining aliased SPA address0: %#llx\n", > + spa_alias); > + if (!memory_failure(pfn, 0)) > + set_mce_nospec(pfn); > + > + return NOTIFY_OK; > +} > + > +void cxl_register_mce_notifier(struct notifier_block *mce_notifier) > +{ > + mce_notifier->notifier_call = cxl_handle_mce; > + mce_notifier->priority = MCE_PRIO_UC; > + mce_register_decode_chain(mce_notifier); > +} > +EXPORT_SYMBOL_NS_GPL(cxl_register_mce_notifier, "CXL"); > + > +void cxl_unregister_mce_notifier(void *mce_notifier) > +{ > + mce_unregister_decode_chain(mce_notifier); > +} > +EXPORT_SYMBOL_NS_GPL(cxl_unregister_mce_notifier, "CXL"); My understanding is that these two functions are no need to be exported, because they are invoked inside cxl_core.ko. I check that they are not exported in v2, any reason for this change? Ming