The AER documentation indicates that correctable (severity=Corrected) errors should be output as a warning so that users can filter these errors if they choose to; This functionality does not appear to have been implemented. This patch modifies the functions aer_print_error and __aer_print_error to send correctable errors as a warning (pci_warn), rather than as an error (pci_err). It partially addresses several bugs in relation to kernel message buffer spam for misbehaving devices - the root cause (possibly device firmware?) isn't addressed, but the dmesg output is less alarming for end users, and can be filtered separately from uncorrectable errors. This should hopefully reduce the need for users to disable AER to suppress corrected errors. Link: https://bugzilla.kernel.org/show_bug.cgi?id=201517 Link: https://bugzilla.kernel.org/show_bug.cgi?id=196183 Signed-off-by: Matt Jolly <Kangie@footclan.ninja> --- drivers/pci/pcie/aer.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 3acf56683915..131ecc0df2cb 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -662,12 +662,18 @@ static void __aer_print_error(struct pci_dev *dev, errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ? aer_uncorrectable_error_string[i] : NULL; - if (errmsg) - pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, - info->first_error == i ? " (First)" : ""); - else + if (errmsg) { + if (info->severity == AER_CORRECTABLE) { + pci_warn(dev, " [%2d] %-22s%s\n", i, errmsg, + info->first_error == i ? " (First)" : ""); + } else { + pci_err(dev, " [%2d] %-22s%s\n", i, errmsg, + info->first_error == i ? " (First)" : ""); + } + } else { pci_err(dev, " [%2d] Unknown Error Bit%s\n", i, info->first_error == i ? " (First)" : ""); + } } pci_dev_aer_stats_incr(dev, info); } @@ -686,13 +692,23 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], - aer_error_layer[layer], aer_agent_string[agent]); + if (info->severity == AER_CORRECTABLE) { + pci_warn(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_error_severity_string[info->severity], + aer_error_layer[layer], aer_agent_string[agent]); - pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", - dev->vendor, dev->device, - info->status, info->mask); + pci_warn(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, + info->status, info->mask); + } else { + pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_error_severity_string[info->severity], + aer_error_layer[layer], aer_agent_string[agent]); + + pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, + info->status, info->mask); + } __aer_print_error(dev, info); -- 2.26.2