Signed-off-by: Shuai Xue <xueshuai@xxxxxxxxxxxxxxxxx>
---
drivers/pci/pci.h | 3 ++-
drivers/pci/pcie/aer.c | 11 +++++++----
drivers/pci/pcie/dpc.c | 2 +-
drivers/pci/pcie/err.c | 9 +++++++++
4 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 870d2fbd6ff2..e852fa58b250 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -549,7 +549,8 @@ struct aer_err_info {
struct pcie_tlp_log tlp; /* TLP Header */
};
-int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
+int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info,
+ bool link_healthy);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 508474e17183..bfb67db074f0 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1197,12 +1197,14 @@ EXPORT_SYMBOL_GPL(aer_recover_queue);
* aer_get_device_error_info - read error status from dev and store it to info
* @dev: pointer to the device expected to have a error record
* @info: pointer to structure to store the error record
+ * @link_healthy: link is healthy or not
*
* Return 1 on success, 0 on error.
*
* Note that @info is reused among all error devices. Clear fields properly.
*/
-int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info,
+ bool link_healthy)
{
int type = pci_pcie_type(dev);
int aer = dev->aer_cap;
@@ -1226,7 +1228,8 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
} else if (type == PCI_EXP_TYPE_ROOT_PORT ||
type == PCI_EXP_TYPE_RC_EC ||
type == PCI_EXP_TYPE_DOWNSTREAM ||
- info->severity == AER_NONFATAL) {
+ info->severity == AER_NONFATAL ||
+ (info->severity == AER_FATAL && link_healthy)) {
/* Link is still healthy for IO reads */
pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS,
@@ -1258,11 +1261,11 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info)
/* Report all before handle them, not to lost records by reset etc. */
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info->dev[i], e_info, false))
aer_print_error(e_info->dev[i], e_info);
}
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info->dev[i], e_info, false))
handle_error_source(e_info->dev[i], e_info);
}
}
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index ea3ea989afa7..2d3dd831b755 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -303,7 +303,7 @@ struct pci_dev *dpc_process_error(struct pci_dev *pdev)
dpc_process_rp_pio_error(pdev);
else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
- aer_get_device_error_info(pdev, &info)) {
+ aer_get_device_error_info(pdev, &info, false)) {
aer_print_error(pdev, &info);
pci_aer_clear_nonfatal_status(pdev);
pci_aer_clear_fatal_status(pdev);
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 31090770fffc..462577b8d75a 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -196,6 +196,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
struct pci_dev *bridge;
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+ struct aer_err_info info;
/*
* If the error was detected by a Root Port, Downstream Port, RCEC,
@@ -223,6 +224,13 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_warn(bridge, "subordinate device reset failed\n");
goto failed;
}
+
+ info.severity = AER_FATAL;
+ /* Link recovered, report fatal errors of RCiEP or EP */
+ if ((type == PCI_EXP_TYPE_ENDPOINT ||
+ type == PCI_EXP_TYPE_RC_END) &&
+ aer_get_device_error_info(dev, &info, true))
+ aer_print_error(dev, &info);
} else {
pci_walk_bridge(bridge, report_normal_detected, &status);
}
@@ -259,6 +267,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
if (host->native_aer || pcie_ports_native) {
pcie_clear_device_status(dev);
pci_aer_clear_nonfatal_status(dev);
+ pci_aer_clear_fatal_status(dev);