Add nominal error handling that tears down CXL.mem in response to error notifications that imply a device reset. Given some CXL.mem may be operating as System RAM, there is a high likelihood that these error events are fatal. However, if the system survives the notification the expectation is that the driver behavior is equivalent to a hot-unplug and re-plug of an endpoint. Note that this does not change the mask values from the default. That awaits CXL _OSC support to determine whether platform firmware is in control of the mask registers. Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/cxl/core/memdev.c | 1 drivers/cxl/cxlmem.h | 2 + drivers/cxl/pci.c | 109 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1f76b28f9826..223d512790e1 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -341,6 +341,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) * needed as this is ordered with cdev_add() publishing the device. */ cxlmd->cxlds = cxlds; + cxlds->cxlmd = cxlmd; cdev = &cxlmd->cdev; rc = cdev_device_add(cdev, dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d33ce24fe09..f58e16951414 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -117,6 +117,7 @@ struct cxl_endpoint_dvsec_info { * Currently only memory devices are represented. * * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev * @regs: Parsed register blocks * @cxl_dvsec: Offset to the PCIe device DVSEC * @payload_size: Size of space for payload @@ -148,6 +149,7 @@ struct cxl_endpoint_dvsec_info { */ struct cxl_dev_state { struct device *dev; + struct cxl_memdev *cxlmd; struct cxl_regs regs; int cxl_dvsec; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index bde8929450f0..823cbfa093fa 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -8,6 +8,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/pci.h> +#include <linux/aer.h> #include <linux/io.h> #include "cxlmem.h" #include "cxlpci.h" @@ -533,6 +534,11 @@ static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) info->ranges = __cxl_dvsec_ranges(cxlds, info); } +static void disable_aer(void *pdev) +{ + pci_disable_pcie_error_reporting(pdev); +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -554,6 +560,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds = cxl_dev_state_create(&pdev->dev); if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + pci_set_drvdata(pdev, cxlds); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( @@ -610,6 +617,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); + if (cxlds->regs.ras) { + pci_enable_pcie_error_reporting(pdev); + rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); + if (rc) + return rc; + } + pci_save_state(pdev); + if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); @@ -623,10 +638,104 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. + */ +static void cxl_report_and_clear(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + void __iomem *addr; + u32 status; + + if (!cxlds->regs.ras) + return; + + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_UNCORRECTABLE_STATUS_MASK) { + dev_warn(cxlds->dev, "%s: uncorrectable status: %#08x\n", + dev_name(dev), status); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + } + + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + dev_warn(cxlds->dev, "%s: correctable status: %#08x\n", + dev_name(dev), status); + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + } +} + +static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + cxl_report_and_clear(cxlds); + + switch (state) { + case pci_channel_io_normal: + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", + dev_name(dev)); + pci_restore_state(pdev); + if (device_attach(dev) <= 0) + return PCI_ERS_RESULT_DISCONNECT; + return PCI_ERS_RESULT_RECOVERED; +} + +static void cxl_error_resume(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), + dev->driver ? "successful" : "failed"); +} + +static const struct pci_error_handlers cxl_error_handlers = { + .error_detected = cxl_error_detected, + .slot_reset = cxl_slot_reset, + .resume = cxl_error_resume, +}; + static struct pci_driver cxl_pci_driver = { .name = KBUILD_MODNAME, .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, + .err_handler = &cxl_error_handlers, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, },