By default the CXL RAS mask registers bits are defaulted to 1's and suppress all error reporting. If the kernel has negotiated ownership of error handling for CXL then unmask the mask registers by writing 0s. PCI_EXP_AER_FLAGS moved to linux/pci.h header to expose to driver. It allows exposure of system enabled PCI error flags for the driver to decide which error bits to toggle. Bjorn suggested that the error enabling should be controlled from the system policy rather than a driver level choice[1]. CXL RAS CE and UE masks are checked against PCI_EXP_AER_FLAGS before unmasking. [1]: https://lore.kernel.org/linux-cxl/20230210122952.00006999@xxxxxxxxxx/T/#me8c7f39d43029c64ccff5c950b78a2cee8e885af Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> --- Based on patch posted by Ira [1] to export CXL native error reporting control. [1]: https://lore.kernel.org/linux-cxl/20221212070627.1372402-2-ira.weiny@xxxxxxxxx/ v6: - Call cxl_pci_ras_unmask() based on return of pci_enable_pcie_error_reporting() - Check PCI_EXP_DEVCTL for UE and CE bit before unmasking the respective error reporting. v5: - Add single debug out to show mask changing. (Dan) v4: - Fix masking of RAS register. (Jonathan) v3: - Remove flex bus port status check. (Jonathan) - Only unmask known mask bits. (Jonathan) v2: - Add definition of PCI_EXP_LNKSTA2_FLIT. (Dan) - Return error for cxl_pci_ras_unmask(). (Dan) - Add dev_dbg() for register bits to be cleared. (Dan) - Check Flex Port DVSEC status. (Dan) --- drivers/cxl/cxl.h | 1 + drivers/cxl/pci.c | 66 +++++++++++++++++++++++++++++++++++++++-- drivers/pci/pcie/aer.c | 3 -- include/linux/pci.h | 2 + include/uapi/linux/pci_regs.h | 1 + 5 files changed, 66 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index b3964149c77b..d640fe61b893 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 #define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8) #define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 #define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) #define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 4cf9a2191602..4bb41cf581df 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -642,6 +642,59 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge, return 0; } +/* + * CXL v3.0 6.2.3 Table 6-4 + * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits + * mode, otherwise it's 68B flits mode. + */ +static bool cxl_pci_flit_256(struct pci_dev *pdev) +{ + u32 lnksta2; + + pcie_capability_read_dword(pdev, PCI_EXP_LNKSTA2, &lnksta2); + return lnksta2 & PCI_EXP_LNKSTA2_FLIT; +} + +static int cxl_pci_ras_unmask(struct pci_dev *pdev) +{ + struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + void __iomem *addr; + u32 orig_val, val, mask; + + if (!cxlds->regs.ras) + return -ENODEV; + + /* BIOS has CXL error control */ + if (!host_bridge->native_cxl_error) + return -EOPNOTSUPP; + + if (PCI_EXP_AER_FLAGS & PCI_EXP_DEVCTL_URRE) { + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + + mask = CXL_RAS_UNCORRECTABLE_MASK_MASK; + if (!cxl_pci_flit_256(pdev)) + mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; + val = orig_val & ~mask; + writel(val, addr); + dev_dbg(&pdev->dev, + "Uncorrectable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + if (PCI_EXP_AER_FLAGS & PCI_EXP_DEVCTL_CERE) { + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; + writel(val, addr); + dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); @@ -734,10 +787,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; if (cxlds->regs.ras) { - pci_enable_pcie_error_reporting(pdev); - rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); - if (rc) - return rc; + rc = pci_enable_pcie_error_reporting(pdev); + if (!rc) { + cxl_pci_ras_unmask(pdev); + rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); + if (rc) + return rc; + } else { + dev_warn(&pdev->dev, "Failed to enable PCIE AER.\n"); + } } pci_save_state(pdev); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 625f7b2cafe4..21af538ee4a0 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -214,9 +214,6 @@ void pcie_ecrc_get_policy(char *str) } #endif /* CONFIG_PCIE_ECRC */ -#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ - PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) - int pcie_aer_is_native(struct pci_dev *dev) { struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); diff --git a/include/linux/pci.h b/include/linux/pci.h index 22319ea71ab0..508eb3659762 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2545,4 +2545,6 @@ void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); WARN_ONCE(condition, "%s %s: " fmt, \ dev_driver_string(&(pdev)->dev), pci_name(pdev), ##arg) +#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) #endif /* LINUX_PCI_H */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 85ab1278811e..f19e5ccf5cc1 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -693,6 +693,7 @@ #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ #define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ +#define PCI_EXP_LNKSTA2_FLIT BIT(10) /* Flit Mode Status */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ #define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */