On 2/11/25 12:24 PM, Terry Bowman wrote: > Introduce correctable and uncorrectable (UCE) CXL PCIe Port Protocol Error > handlers. > > The handlers will be called with a 'struct pci_dev' parameter > indicating the CXL Port device requiring handling. The CXL PCIe Port > device's underlying 'struct device' will match the port device in the > CXL topology. > > Use the PCIe Port's device object to find the matching CXL Upstream Switch > Port, CXL Downstream Switch Port, or CXL Root Port in the CXL topology. The > matching CXL Port device should contain a cached reference to the RAS > register block. The cached RAS block will be used in handling the error. > > Invoke the existing __cxl_handle_ras() or __cxl_handle_cor_ras() using > a reference to the RAS registers as a parameter. These functions will use > the RAS register reference to indicate an error and clear the device's RAS > status. > > Update __cxl_handle_ras() to return PCI_ERS_RESULT_PANIC in the case > an error is present in the RAS status. Otherwise, return > PCI_ERS_RESULT_NONE. Maybe a comment on why the change? > > Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> > --- > drivers/cxl/core/pci.c | 81 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 77 insertions(+), 4 deletions(-) > > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c > index af809e7cbe3b..3f13d9dfb610 100644 > --- a/drivers/cxl/core/pci.c > +++ b/drivers/cxl/core/pci.c > @@ -699,7 +699,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) > * Log the state of the RAS status registers and prepare them to log the > * next error status. Return 1 if reset needed. > */ > -static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) > +static pci_ers_result_t __cxl_handle_ras(struct device *dev, void __iomem *ras_base) > { > u32 hl[CXL_HEADERLOG_SIZE_U32]; > void __iomem *addr; > @@ -708,13 +708,13 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) > > if (!ras_base) { > dev_warn_once(dev, "CXL RAS register block is not mapped"); > - return false; > + return PCI_ERS_RESULT_NONE; > } > > addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; > status = readl(addr); > if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) > - return false; > + return PCI_ERS_RESULT_NONE; > > /* If multiple errors, log header points to first error from ctrl reg */ > if (hweight32(status) > 1) { > @@ -733,7 +733,7 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) > > writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); > > - return true; > + return PCI_ERS_RESULT_PANIC; > } > > static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) > @@ -782,6 +782,79 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) > writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); > } > > +static int match_uport(struct device *dev, const void *data) > +{ > + const struct device *uport_dev = data; > + struct cxl_port *port; > + > + if (!is_cxl_port(dev)) > + return 0; > + > + port = to_cxl_port(dev); > + > + return port->uport_dev == uport_dev; > +} > + > +static void __iomem *cxl_pci_port_ras(struct pci_dev *pdev, struct device **dev) > +{ > + void __iomem *ras_base; > + > + if (!pdev || !*dev) { > + pr_err("Failed, parameter is NULL"); > + return NULL; > + } > + > + if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) || > + (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) { Can probably just do a switch block here for the type? > + struct cxl_port *port __free(put_cxl_port); > + struct cxl_dport *dport = NULL; > + > + port = find_cxl_port(&pdev->dev, &dport); Just declare port inline: struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport); > + if (!port) { > + pci_err(pdev, "Failed to find root/dport in CXL topology\n"); > + return NULL; > + } > + > + ras_base = dport ? dport->regs.ras : NULL; > + *dev = &port->dev; > + } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) { > + struct device *port_dev __free(put_device); same comment here as above DJ > + struct cxl_port *port; > + > + port_dev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev, > + match_uport); > + if (!port_dev || !is_cxl_port(port_dev)) { > + pci_err(pdev, "Failed to find uport in CXL topology\n"); > + return NULL; > + } > + > + port = to_cxl_port(port_dev); > + ras_base = port ? port->uport_regs.ras : NULL; > + *dev = port_dev; > + } else { > + pci_err(pdev, "Unsupported device type\n"); > + ras_base = NULL; > + } > + > + return ras_base; > +} > + > +static void cxl_port_cor_error_detected(struct pci_dev *pdev) > +{ > + struct device *dev; > + void __iomem *ras_base = cxl_pci_port_ras(pdev, &dev); > + > + __cxl_handle_cor_ras(dev, ras_base); > +} > + > +static pci_ers_result_t cxl_port_error_detected(struct pci_dev *pdev) > +{ > + struct device *dev; > + void __iomem *ras_base = cxl_pci_port_ras(pdev, &dev); > + > + return __cxl_handle_ras(dev, ras_base); > +} > + > void cxl_uport_init_ras_reporting(struct cxl_port *port) > { >