On 15/01/2025 08:42, Jon Pan-Doh wrote:
Decouple stat collection from internal AER print functions. AERs from ghes
or cxl drivers have stat collection in pci_print_aer as that is where
aer_err_info is populated.
Tested using aer-inject[1] tool. AER sysfs counters still updated
correctly.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/gong.chen/aer-inject.git
Signed-off-by: Jon Pan-Doh <pandoh@xxxxxxxxxx>
---
drivers/pci/pcie/aer.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index ba40800b5494..4bb0b3840402 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -695,7 +695,6 @@ static void __aer_print_error(struct pci_dev *dev,
pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
}
- pci_dev_aer_stats_incr(dev, info);
}
With this change, we stop calling pci_dev_aer_stats_incr() in
dpc_process_error(). Is this intended?
All the best,
Karolina
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
@@ -775,6 +774,8 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
info.mask = mask;
info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
+ pci_dev_aer_stats_incr(dev, &info);
+
pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
__aer_print_error(dev, &info);
pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
@@ -1249,8 +1250,10 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info)
/* Report all before handle them, not to lost records by reset etc. */
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info->dev[i], e_info)) {
+ pci_dev_aer_stats_incr(e_info->dev[i], e_info);
aer_print_error(e_info->dev[i], e_info);
+ }
}
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
if (aer_get_device_error_info(e_info->dev[i], e_info))