Hi Dan, >-----Original Message----- >From: Dan Williams <dan.j.williams@xxxxxxxxx> >Sent: 16 March 2022 04:14 >To: linux-cxl@xxxxxxxxxxxxxxx >Cc: ben.widawsky@xxxxxxxxx; vishal.l.verma@xxxxxxxxx; >alison.schofield@xxxxxxxxx; Jonathan Cameron ><jonathan.cameron@xxxxxxxxxx>; ira.weiny@xxxxxxxxx; linux- >pci@xxxxxxxxxxxxxxx >Subject: [PATCH 8/8] cxl/pci: Add (hopeful) error handling support > >Add nominal error handling that tears down CXL.mem in response to error >notifications that imply a device reset. Given some CXL.mem may be >operating as System RAM, there is a high likelihood that these error events >are fatal. However, if the system survives the notification the expectation is >that the driver behavior is equivalent to a hot-unplug and re-plug of an >endpoint. > >Note that this does not change the mask values from the default. That awaits >CXL _OSC support to determine whether platform firmware is in control of the >mask registers. > >Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> >--- > drivers/cxl/core/memdev.c | 1 > drivers/cxl/cxlmem.h | 2 + > drivers/cxl/pci.c | 109 >+++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 112 insertions(+) > >diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index >1f76b28f9826..223d512790e1 100644 >--- a/drivers/cxl/core/memdev.c >+++ b/drivers/cxl/core/memdev.c >@@ -341,6 +341,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct >cxl_dev_state *cxlds) > * needed as this is ordered with cdev_add() publishing the device. > */ > cxlmd->cxlds = cxlds; >+ cxlds->cxlmd = cxlmd; > > cdev = &cxlmd->cdev; > rc = cdev_device_add(cdev, dev); >diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index >5d33ce24fe09..f58e16951414 100644 >--- a/drivers/cxl/cxlmem.h >+++ b/drivers/cxl/cxlmem.h >@@ -117,6 +117,7 @@ struct cxl_endpoint_dvsec_info { > * Currently only memory devices are represented. > * > * @dev: The device associated with this CXL state >+ * @cxlmd: The device representing the CXL.mem capabilities of @dev > * @regs: Parsed register blocks > * @cxl_dvsec: Offset to the PCIe device DVSEC > * @payload_size: Size of space for payload @@ -148,6 +149,7 @@ struct >cxl_endpoint_dvsec_info { > */ > struct cxl_dev_state { > struct device *dev; >+ struct cxl_memdev *cxlmd; > > struct cxl_regs regs; > int cxl_dvsec; >diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index >bde8929450f0..823cbfa093fa 100644 >--- a/drivers/cxl/pci.c >+++ b/drivers/cxl/pci.c >@@ -8,6 +8,7 @@ > #include <linux/mutex.h> > #include <linux/list.h> > #include <linux/pci.h> >+#include <linux/aer.h> > #include <linux/io.h> > #include "cxlmem.h" > #include "cxlpci.h" >@@ -533,6 +534,11 @@ static void cxl_dvsec_ranges(struct cxl_dev_state >*cxlds) > info->ranges = __cxl_dvsec_ranges(cxlds, info); } > >+static void disable_aer(void *pdev) >+{ >+ pci_disable_pcie_error_reporting(pdev); >+} >+ > static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) >{ > struct cxl_register_map map; >@@ -554,6 +560,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const >struct pci_device_id *id) > cxlds = cxl_dev_state_create(&pdev->dev); > if (IS_ERR(cxlds)) > return PTR_ERR(cxlds); >+ pci_set_drvdata(pdev, cxlds); > > cxlds->serial = pci_get_dsn(pdev); > cxlds->cxl_dvsec = pci_find_dvsec_capability( @@ -610,6 +617,14 @@ >static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (IS_ERR(cxlmd)) > return PTR_ERR(cxlmd); > >+ if (cxlds->regs.ras) { >+ pci_enable_pcie_error_reporting(pdev); >+ rc = devm_add_action_or_reset(&pdev->dev, disable_aer, >pdev); >+ if (rc) >+ return rc; >+ } >+ pci_save_state(pdev); >+ > if (range_len(&cxlds->pmem_range) && >IS_ENABLED(CONFIG_CXL_PMEM)) > rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); > >@@ -623,10 +638,104 @@ static const struct pci_device_id cxl_mem_pci_tbl[] >= { }; MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); > >+/* >+ * Log the state of the RAS status registers and prepare them to log >+the >+ * next error status. >+ */ >+static void cxl_report_and_clear(struct cxl_dev_state *cxlds) { >+ struct cxl_memdev *cxlmd = cxlds->cxlmd; >+ struct device *dev = &cxlmd->dev; >+ void __iomem *addr; >+ u32 status; >+ >+ if (!cxlds->regs.ras) >+ return; In the cxl_error_detected () may need to return PCI_ERS_RESULT_NONE for the following cases, if exist, 1. if (!cxlds->regs.ras), 2. if any errors would be reported during the dev initialization. >+ >+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; >+ status = readl(addr); >+ if (status & CXL_RAS_UNCORRECTABLE_STATUS_MASK) { >+ dev_warn(cxlds->dev, "%s: uncorrectable status: %#08x\n", >+ dev_name(dev), status); >+ writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, >addr); >+ } For the uncorrectable non-fatal errors, if any, may need to return PCI_ERS_RESULT_NEED_RESET to trigger the slot reset to prevent more serious issues later. For this case the state would be "pci_channel_io_normal". >+ >+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; >+ status = readl(addr); >+ if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { >+ dev_warn(cxlds->dev, "%s: correctable status: %#08x\n", >+ dev_name(dev), status); >+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, >addr); >+ } >+} >+ >+static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, >+ pci_channel_state_t state) >+{ >+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >+ struct cxl_memdev *cxlmd = cxlds->cxlmd; >+ struct device *dev = &cxlmd->dev; >+ >+ /* >+ * A frozen channel indicates an impending reset which is fatal to >+ * CXL.mem operation, and will likely crash the system. On the off >+ * chance the situation is recoverable dump the status of the RAS >+ * capability registers and bounce the active state of the memdev. >+ */ >+ cxl_report_and_clear(cxlds); >+ >+ switch (state) { >+ case pci_channel_io_normal: >+ return PCI_ERS_RESULT_CAN_RECOVER; >+ case pci_channel_io_frozen: >+ dev_warn(&pdev->dev, >+ "%s: frozen state error detected, disable >CXL.mem\n", >+ dev_name(dev)); >+ device_release_driver(dev); >+ return PCI_ERS_RESULT_NEED_RESET; >+ case pci_channel_io_perm_failure: >+ dev_warn(&pdev->dev, >+ "failure state error detected, request disconnect\n"); >+ return PCI_ERS_RESULT_DISCONNECT; >+ } >+ return PCI_ERS_RESULT_NEED_RESET; >+} >+ >+static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) { >+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >+ struct cxl_memdev *cxlmd = cxlds->cxlmd; >+ struct device *dev = &cxlmd->dev; >+ >+ dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", >+ dev_name(dev)); >+ pci_restore_state(pdev); 1. Do we need to call pci_save_state(pdev) here after the reset? though pci_save_state(pdev) is being invoked in the cxl_pci_probe(). >+ if (device_attach(dev) <= 0) >+ return PCI_ERS_RESULT_DISCONNECT; My understanding is that pci_disable_pcie_error_reporting(pdev) would be called in the disable_aer () in the reset, pci_enable_pcie_error_reporting(pdev) may need to call here after the reset? >+ return PCI_ERS_RESULT_RECOVERED; >+} >+ >+static void cxl_error_resume(struct pci_dev *pdev) { >+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >+ struct cxl_memdev *cxlmd = cxlds->cxlmd; >+ struct device *dev = &cxlmd->dev; >+ >+ dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), >+ dev->driver ? "successful" : "failed"); } >+ >+static const struct pci_error_handlers cxl_error_handlers = { >+ .error_detected = cxl_error_detected, >+ .slot_reset = cxl_slot_reset, >+ .resume = cxl_error_resume, If the FLR (Function level reset) supported, please add the corresponding callback functions reset_prepare(..) and reset_done(..). >+}; >+ > static struct pci_driver cxl_pci_driver = { > .name = KBUILD_MODNAME, > .id_table = cxl_mem_pci_tbl, > .probe = cxl_pci_probe, >+ .err_handler = &cxl_error_handlers, > .driver = { > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > }, Thanks, Shiju