On Tue, 15 Mar 2022 21:14:24 -0700 Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > Add nominal error handling that tears down CXL.mem in response to error > notifications that imply a device reset. Given some CXL.mem may be > operating as System RAM, there is a high likelihood that these error > events are fatal. However, if the system survives the notification the > expectation is that the driver behavior is equivalent to a hot-unplug > and re-plug of an endpoint. > > Note that this does not change the mask values from the default. That > awaits CXL _OSC support to determine whether platform firmware is in > control of the mask registers. > > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> I'm far from an expert in PCI error handling... I've asked one of our RAS folk to take a quick look so he may well reply as well. With that in mind... > --- > drivers/cxl/core/memdev.c | 1 > drivers/cxl/cxlmem.h | 2 + > drivers/cxl/pci.c | 109 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 112 insertions(+) > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > index 1f76b28f9826..223d512790e1 100644 > --- a/drivers/cxl/core/memdev.c > +++ b/drivers/cxl/core/memdev.c > @@ -341,6 +341,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) > * needed as this is ordered with cdev_add() publishing the device. > */ > cxlmd->cxlds = cxlds; > + cxlds->cxlmd = cxlmd; > > cdev = &cxlmd->cdev; > rc = cdev_device_add(cdev, dev); > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 5d33ce24fe09..f58e16951414 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -117,6 +117,7 @@ struct cxl_endpoint_dvsec_info { > * Currently only memory devices are represented. > * > * @dev: The device associated with this CXL state > + * @cxlmd: The device representing the CXL.mem capabilities of @dev > * @regs: Parsed register blocks > * @cxl_dvsec: Offset to the PCIe device DVSEC > * @payload_size: Size of space for payload > @@ -148,6 +149,7 @@ struct cxl_endpoint_dvsec_info { > */ > struct cxl_dev_state { > struct device *dev; > + struct cxl_memdev *cxlmd; This is only used I think to access the cxlmd->dev. Perhaps better to pass the dev and avoid having a reference to the memdev in here? > > struct cxl_regs regs; > int cxl_dvsec; > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c > index bde8929450f0..823cbfa093fa 100644 > --- a/drivers/cxl/pci.c > +++ b/drivers/cxl/pci.c > @@ -8,6 +8,7 @@ > #include <linux/mutex.h> > #include <linux/list.h> > #include <linux/pci.h> > +#include <linux/aer.h> > #include <linux/io.h> > #include "cxlmem.h" > #include "cxlpci.h" > @@ -533,6 +534,11 @@ static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) > info->ranges = __cxl_dvsec_ranges(cxlds, info); > } > > +static void disable_aer(void *pdev) > +{ > + pci_disable_pcie_error_reporting(pdev); > +} > + > static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > struct cxl_register_map map; > @@ -554,6 +560,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > cxlds = cxl_dev_state_create(&pdev->dev); > if (IS_ERR(cxlds)) > return PTR_ERR(cxlds); > + pci_set_drvdata(pdev, cxlds); > > cxlds->serial = pci_get_dsn(pdev); > cxlds->cxl_dvsec = pci_find_dvsec_capability( > @@ -610,6 +617,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > if (IS_ERR(cxlmd)) > return PTR_ERR(cxlmd); > > + if (cxlds->regs.ras) { > + pci_enable_pcie_error_reporting(pdev); > + rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); > + if (rc) > + return rc; > + } > + pci_save_state(pdev); > + > if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) > rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); > > @@ -623,10 +638,104 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = { > }; > MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); > > +/* > + * Log the state of the RAS status registers and prepare them to log the > + * next error status. > + */ > +static void cxl_report_and_clear(struct cxl_dev_state *cxlds) > +{ > + struct cxl_memdev *cxlmd = cxlds->cxlmd; > + struct device *dev = &cxlmd->dev; > + void __iomem *addr; > + u32 status; > + > + if (!cxlds->regs.ras) > + return; > + > + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; > + status = readl(addr); > + if (status & CXL_RAS_UNCORRECTABLE_STATUS_MASK) { > + dev_warn(cxlds->dev, "%s: uncorrectable status: %#08x\n", > + dev_name(dev), status); > + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); > + } > + > + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; > + status = readl(addr); > + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { > + dev_warn(cxlds->dev, "%s: correctable status: %#08x\n", > + dev_name(dev), status); > + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); > + } > +} > + > +static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, > + pci_channel_state_t state) > +{ > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + struct cxl_memdev *cxlmd = cxlds->cxlmd; > + struct device *dev = &cxlmd->dev; Perhaps a more informative name than dev given we have several devs involved here? > + > + /* > + * A frozen channel indicates an impending reset which is fatal to > + * CXL.mem operation, and will likely crash the system. On the off > + * chance the situation is recoverable dump the status of the RAS > + * capability registers and bounce the active state of the memdev. > + */ > + cxl_report_and_clear(cxlds); > + > + switch (state) { > + case pci_channel_io_normal: > + return PCI_ERS_RESULT_CAN_RECOVER; > + case pci_channel_io_frozen: > + dev_warn(&pdev->dev, > + "%s: frozen state error detected, disable CXL.mem\n", > + dev_name(dev)); > + device_release_driver(dev); > + return PCI_ERS_RESULT_NEED_RESET; > + case pci_channel_io_perm_failure: > + dev_warn(&pdev->dev, > + "failure state error detected, request disconnect\n"); > + return PCI_ERS_RESULT_DISCONNECT; > + } > + return PCI_ERS_RESULT_NEED_RESET; > +} > + > +static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) > +{ > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + struct cxl_memdev *cxlmd = cxlds->cxlmd; > + struct device *dev = &cxlmd->dev; > + > + dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", > + dev_name(dev)); > + pci_restore_state(pdev); > + if (device_attach(dev) <= 0) > + return PCI_ERS_RESULT_DISCONNECT; > + return PCI_ERS_RESULT_RECOVERED; > +} > + > +static void cxl_error_resume(struct pci_dev *pdev) > +{ > + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); > + struct cxl_memdev *cxlmd = cxlds->cxlmd; > + struct device *dev = &cxlmd->dev; > + > + dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), > + dev->driver ? "successful" : "failed"); > +} > + > +static const struct pci_error_handlers cxl_error_handlers = { > + .error_detected = cxl_error_detected, > + .slot_reset = cxl_slot_reset, > + .resume = cxl_error_resume, > +}; > + > static struct pci_driver cxl_pci_driver = { > .name = KBUILD_MODNAME, > .id_table = cxl_mem_pci_tbl, > .probe = cxl_pci_probe, > + .err_handler = &cxl_error_handlers, > .driver = { > .probe_type = PROBE_PREFER_ASYNCHRONOUS, > }, >