Fetch and store the data of 3 more registers: "Link Status", "Device Control 2", and "Advanced Error Capabilities and Control". This data is needed for external observation to better understand ANFE. Signed-off-by: "Wang, Qingshun" <qingshun.wang@xxxxxxxxxxxxxxx> --- drivers/acpi/apei/ghes.c | 8 +++++++- drivers/cxl/core/pci.c | 11 ++++++++++- drivers/pci/pci.h | 4 ++++ drivers/pci/pcie/aer.c | 26 ++++++++++++++++++++------ include/linux/aer.h | 6 ++++-- 5 files changed, 45 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 6034039d5cff..047cc01be68c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -594,7 +594,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { struct pcie_capability_regs *pcie_caps; + u16 device_control_2 = 0; u16 device_status = 0; + u16 link_status = 0; unsigned int devfn; int aer_severity; u8 *aer_info; @@ -619,7 +621,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) if (pcie_err->validation_bits & CPER_PCIE_VALID_CAPABILITY) { pcie_caps = (struct pcie_capability_regs *)pcie_err->capability; + device_control_2 = pcie_caps->device_control_2; device_status = pcie_caps->device_status; + link_status = pcie_caps->link_status; } aer_recover_queue(pcie_err->device_id.segment, @@ -627,7 +631,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) devfn, aer_severity, (struct aer_capability_regs *) aer_info, - device_status); + device_status, + link_status, + device_control_2); } #endif } diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9111a4415a63..3aa57fe8db42 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -903,7 +903,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) struct aer_capability_regs aer_regs; struct cxl_dport *dport; struct cxl_port *port; + u16 device_control_2; u16 device_status; + u16 link_status; int severity; port = cxl_pci_find_port(pdev, &dport); @@ -918,10 +920,17 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) return; + if (pcie_capability_read_word(pdev, PCI_EXP_DEVCTL2, &device_control_2)) + return; + if (pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, &device_status)) return; - pci_print_aer(pdev, severity, &aer_regs, device_status); + if (pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &link_status)) + return; + + pci_print_aer(pdev, severity, &aer_regs, device_status, + link_status, device_control_2); if (severity == AER_CORRECTABLE) cxl_handle_rdport_cor_ras(cxlds, dport); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f881a1b42f14..5788a94b4e95 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -412,7 +412,11 @@ struct aer_err_info { u32 uncor_mask; /* UNCOR Error Mask */ u32 uncor_status; /* UNCOR Error Status */ u32 uncor_severity; /* UNCOR Error Severity */ + + u16 link_status; + u32 aer_cap_ctrl; /* AER Capabilities and Control */ u16 device_status; + u16 device_control_2; struct aer_header_log_regs tlp; /* TLP Header */ }; diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 713cbf625d3f..eec3406f727a 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -825,7 +825,8 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer, u16 device_status) + struct aer_capability_regs *aer, u16 device_status, + u16 link_status, u16 device_control_2) { int layer, agent, tlp_header_valid = 0; u32 status, mask; @@ -850,7 +851,10 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.uncor_status = aer->uncor_status; info.uncor_severity = aer->uncor_severity; info.uncor_mask = aer->uncor_mask; + info.link_status = link_status; + info.aer_cap_ctrl = aer->cap_control; info.device_status = device_status; + info.device_control_2 = device_control_2; info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); @@ -1205,7 +1209,9 @@ struct aer_recover_entry { u8 devfn; u16 domain; int severity; + u16 link_status; u16 device_status; + u16 device_control_2; struct aer_capability_regs *regs; }; @@ -1226,7 +1232,8 @@ static void aer_recover_work_func(struct work_struct *work) PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; } - pci_print_aer(pdev, entry.severity, entry.regs, entry.device_status); + pci_print_aer(pdev, entry.severity, entry.regs, entry.device_status, + entry.link_status, entry.device_control_2); /* * Memory for aer_capability_regs(entry.regs) is being allocated from the * ghes_estatus_pool to protect it from overwriting when multiple sections @@ -1255,7 +1262,8 @@ static DEFINE_SPINLOCK(aer_recover_ring_lock); static DECLARE_WORK(aer_recover_work, aer_recover_work_func); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity, struct aer_capability_regs *aer_regs, u16 device_status) + int severity, struct aer_capability_regs *aer_regs, u16 device_status, + u16 link_status, u16 device_control_2) { struct aer_recover_entry entry = { .bus = bus, @@ -1263,7 +1271,9 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, .domain = domain, .severity = severity, .regs = aer_regs, + .link_status = link_status, .device_status = device_status, + .device_control_2 = device_control_2, }; if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1, @@ -1289,7 +1299,6 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { int type = pci_pcie_type(dev); int aer = dev->aer_cap; - int temp; /* Must reset in this function */ info->cor_status = 0; @@ -1317,8 +1326,14 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) &info->uncor_severity); pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &info->uncor_mask); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, + &info->aer_cap_ctrl); + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, + &info->link_status); pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &info->device_status); + pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, + &info->device_control_2); } else { return 1; } @@ -1331,8 +1346,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) return 0; /* Get First Error Pointer */ - pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); - info->first_error = PCI_ERR_CAP_FEP(temp); + info->first_error = PCI_ERR_CAP_FEP(info->aer_cap_ctrl); if (info->uncor_status & AER_LOG_TLP_MASKS) { info->tlp_header_valid = 1; diff --git a/include/linux/aer.h b/include/linux/aer.h index 38ac802250ac..327ebec1e4b3 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -52,9 +52,11 @@ static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer, u16 device_status); + struct aer_capability_regs *aer, u16 device_status, + u16 link_status, u16 device_control_2); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity, struct aer_capability_regs *aer_regs, u16 device_status); + int severity, struct aer_capability_regs *aer_regs, u16 device_status, + u16 link_status, u16 device_control_2); #endif //_AER_H_ -- 2.42.0