This patch is required not to lost error records by action invoked on error recovery, such as slot reset etc. Following sample (real machine + dummy record injected by aer-inject) shows that record of 28:00.1 could not be retrieved by recovery of 28:00.0: - Before: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX - After: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2801(Receiver ID) e1000e 0000:28:00.1: device [8086:1096] error status/mask=00081000/00100000 e1000e 0000:28:00.1: [12] Poisoned TLP (First) e1000e 0000:28:00.1: [19] ECRC e1000e 0000:28:00.1: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: Error of this Agent(2801) is reported first e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX Signed-off-by: Hidetoshi Seto <seto.hidetoshi@xxxxxxxxxxxxxx> --- drivers/pci/pcie/aer/aerdrv.h | 2 -- drivers/pci/pcie/aer/aerdrv_core.c | 29 ++++++++++++++++++----------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index f9979eb..bd833ea 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -29,8 +29,6 @@ #define ERR_COR_ID(d) (d & 0xffff) #define ERR_UNCOR_ID(d) (d >> 16) -#define AER_SUCCESS 0 -#define AER_UNSUCCESS 1 #define AER_ERROR_SOURCES_MAX 100 #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index d9185cd..9f5ccbe 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -696,6 +696,13 @@ static struct aer_err_source *get_e_source(struct aer_rpc *rpc) return e_source; } +/** + * get_device_error_info - read error status from dev and store it to info + * @dev: pointer to the device expected to have a error record + * @info: pointer to structure to store the error record + * + * Return 1 on success, 0 on error. + */ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { int pos, temp; @@ -707,7 +714,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) /* The device might not support AER */ if (!pos) - return AER_SUCCESS; + return 1; if (info->severity == AER_CORRECTABLE) { pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, @@ -715,7 +722,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &info->mask); if (!(info->status & ~info->mask)) - return AER_UNSUCCESS; + return 0; } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || info->severity == AER_NONFATAL) { @@ -725,7 +732,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &info->mask); if (!(info->status & ~info->mask)) - return AER_UNSUCCESS; + return 0; /* Get First Error Pointer */ pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); @@ -744,7 +751,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) } } - return AER_SUCCESS; + return 1; } static inline void aer_process_err_devices(struct pcie_device *p_device, @@ -758,14 +765,14 @@ static inline void aer_process_err_devices(struct pcie_device *p_device, e_info->id); } + /* Report all before handle them, not to lost records by reset etc. */ for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (get_device_error_info(e_info->dev[i], e_info) == - AER_SUCCESS) { + if (get_device_error_info(e_info->dev[i], e_info)) aer_print_error(e_info->dev[i], e_info); - handle_error_source(p_device, - e_info->dev[i], - e_info); - } + } + for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { + if (get_device_error_info(e_info->dev[i], e_info)) + handle_error_source(p_device, e_info->dev[i], e_info); } } @@ -870,5 +877,5 @@ int aer_init(struct pcie_device *dev) if (aer_osc_setup(dev) && !forceload) return -ENXIO; - return AER_SUCCESS; + return 0; } -- 1.6.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html