On 2/11/2025 6:11 PM, Dave Jiang wrote: > > On 2/11/25 12:24 PM, Terry Bowman wrote: >> Introduce correctable and uncorrectable (UCE) CXL PCIe Port Protocol Error >> handlers. >> >> The handlers will be called with a 'struct pci_dev' parameter >> indicating the CXL Port device requiring handling. The CXL PCIe Port >> device's underlying 'struct device' will match the port device in the >> CXL topology. >> >> Use the PCIe Port's device object to find the matching CXL Upstream Switch >> Port, CXL Downstream Switch Port, or CXL Root Port in the CXL topology. The >> matching CXL Port device should contain a cached reference to the RAS >> register block. The cached RAS block will be used in handling the error. >> >> Invoke the existing __cxl_handle_ras() or __cxl_handle_cor_ras() using >> a reference to the RAS registers as a parameter. These functions will use >> the RAS register reference to indicate an error and clear the device's RAS >> status. >> >> Update __cxl_handle_ras() to return PCI_ERS_RESULT_PANIC in the case >> an error is present in the RAS status. Otherwise, return >> PCI_ERS_RESULT_NONE. > Maybe a comment on why the change? Ok. >> Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx> >> --- >> drivers/cxl/core/pci.c | 81 +++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 77 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c >> index af809e7cbe3b..3f13d9dfb610 100644 >> --- a/drivers/cxl/core/pci.c >> +++ b/drivers/cxl/core/pci.c >> @@ -699,7 +699,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) >> * Log the state of the RAS status registers and prepare them to log the >> * next error status. Return 1 if reset needed. >> */ >> -static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) >> +static pci_ers_result_t __cxl_handle_ras(struct device *dev, void __iomem *ras_base) >> { >> u32 hl[CXL_HEADERLOG_SIZE_U32]; >> void __iomem *addr; >> @@ -708,13 +708,13 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) >> >> if (!ras_base) { >> dev_warn_once(dev, "CXL RAS register block is not mapped"); >> - return false; >> + return PCI_ERS_RESULT_NONE; >> } >> >> addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; >> status = readl(addr); >> if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) >> - return false; >> + return PCI_ERS_RESULT_NONE; >> >> /* If multiple errors, log header points to first error from ctrl reg */ >> if (hweight32(status) > 1) { >> @@ -733,7 +733,7 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base) >> >> writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); >> >> - return true; >> + return PCI_ERS_RESULT_PANIC; >> } >> >> static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) >> @@ -782,6 +782,79 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); >> } >> >> +static int match_uport(struct device *dev, const void *data) >> +{ >> + const struct device *uport_dev = data; >> + struct cxl_port *port; >> + >> + if (!is_cxl_port(dev)) >> + return 0; >> + >> + port = to_cxl_port(dev); >> + >> + return port->uport_dev == uport_dev; >> +} >> + >> +static void __iomem *cxl_pci_port_ras(struct pci_dev *pdev, struct device **dev) >> +{ >> + void __iomem *ras_base; >> + >> + if (!pdev || !*dev) { >> + pr_err("Failed, parameter is NULL"); >> + return NULL; >> + } >> + >> + if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) || >> + (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) { > Can probably just do a switch block here for the type? > >> + struct cxl_port *port __free(put_cxl_port); >> + struct cxl_dport *dport = NULL; >> + >> + port = find_cxl_port(&pdev->dev, &dport); > Just declare port inline: > > struct cxl_port *port __free(put_cxl_port) = > find_cxl_port(&pdev->dev, &dport); > >> + if (!port) { >> + pci_err(pdev, "Failed to find root/dport in CXL topology\n"); >> + return NULL; >> + } >> + >> + ras_base = dport ? dport->regs.ras : NULL; >> + *dev = &port->dev; >> + } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) { >> + struct device *port_dev __free(put_device); > same comment here as above > > DJ Thanks Dave, I'll incorporate these changes into v8. Terry >> + struct cxl_port *port; >> + >> + port_dev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev, >> + match_uport); >> + if (!port_dev || !is_cxl_port(port_dev)) { >> + pci_err(pdev, "Failed to find uport in CXL topology\n"); >> + return NULL; >> + } >> + >> + port = to_cxl_port(port_dev); >> + ras_base = port ? port->uport_regs.ras : NULL; >> + *dev = port_dev; >> + } else { >> + pci_err(pdev, "Unsupported device type\n"); >> + ras_base = NULL; >> + } >> + >> + return ras_base; >> +} >> + >> +static void cxl_port_cor_error_detected(struct pci_dev *pdev) >> +{ >> + struct device *dev; >> + void __iomem *ras_base = cxl_pci_port_ras(pdev, &dev); >> + >> + __cxl_handle_cor_ras(dev, ras_base); >> +} >> + >> +static pci_ers_result_t cxl_port_error_detected(struct pci_dev *pdev) >> +{ >> + struct device *dev; >> + void __iomem *ras_base = cxl_pci_port_ras(pdev, &dev); >> + >> + return __cxl_handle_ras(dev, ras_base); >> +} >> + >> void cxl_uport_init_ras_reporting(struct cxl_port *port) >> { >>