On Wed, Feb 07, 2018 at 03:11:25PM -0500, Tyler Baicar wrote: > Currently the AER driver uses cper_print_bits() to print the AER status > string. This causes the status string to not include the proper PCI device > name prefix that the other AER prints include. Also, it has a different > print level than all the other AER prints, and there is a potential to > have multiple status prints based on string lengths. > > Update the AER driver to print the AER status string with the proper string > prefix and proper print level, and abreviate the status strings similar to > lspci -vv prints so they can be printed on the same line. > > Previous log example: > > e1000e 0003:01:00.1: aer_status: 0x00000041, aer_mask: 0x00000000 > Receiver Error, Bad TLP > e1000e 0003:01:00.1: aer_layer=Physical Layer, aer_agent=Receiver ID > pcieport 0003:00:00.0: aer_status: 0x00001000, aer_mask: 0x0000e000 > Replay Timer Timeout > pcieport 0003:00:00.0: aer_layer=Data Link Layer, aer_agent=Transmitter ID > > New log: > > e1000e 0003:01:00.1: aer_status: 0x00000041, aer_mask: 0x00000000 > e1000e 0003:01:00.1: RxErr, BadTLP > e1000e 0003:01:00.1: aer_layer=Physical Layer, aer_agent=Receiver ID > pcieport 0003:00:00.0: aer_status: 0x00001000, aer_mask: 0x0000e000 > pcieport 0003:00:00.0: Timeout > pcieport 0003:00:00.0: aer_layer=Data Link Layer, aer_agent=Transmitter ID This is awesome, much better than before. But it only changes the output via the APEI/GHES path. I think errors reported via the "native" path, i.e., aer_print_error(), should look the same. Since this patch changes the way cper_print_aer() decodes the status bits, can you make the aer_print_error() status bit decoding match it? Both paths (cper_print_aer() and aer_print_error()) also print the raw "status" and "mask" values. But these can be from either the Uncorrectable Error registers or the Correctable Errors registers. I don't think cper_print_aer() prints any clue about which is the source. Can you include something like what aer_print_error() does, e.g., with aer_error_severity_string[]? I would suggest splitting this into a few patches: - abbreviate the *_error_string[] values - change cper_print_aer() to use dev_print_bits() instead of cper_print_bits() - change cper_print_aer() to print severity/type/id in the same format aer_print_error() uses - change aer_print_error() to use dev_print_bits() instead of __aer_print_error() > Signed-off-by: Tyler Baicar <tbaicar@xxxxxxxxxxxxxx> > --- > drivers/pci/pcie/aer/aerdrv_errprint.c | 71 ++++++++++++++++++++++------------ > 1 file changed, 47 insertions(+), 24 deletions(-) > > diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c > index 6a352e6..bb68dd4 100644 > --- a/drivers/pci/pcie/aer/aerdrv_errprint.c > +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c > @@ -72,22 +72,22 @@ > }; > > static const char *aer_correctable_error_string[] = { > - "Receiver Error", /* Bit Position 0 */ > + "RxErr", /* Bit Position 0 */ > NULL, > NULL, > NULL, > NULL, > NULL, > - "Bad TLP", /* Bit Position 6 */ > - "Bad DLLP", /* Bit Position 7 */ > - "RELAY_NUM Rollover", /* Bit Position 8 */ > + "BadTLP", /* Bit Position 6 */ > + "BadDLLP", /* Bit Position 7 */ > + "Rollover", /* Bit Position 8 */ > NULL, > NULL, > NULL, > - "Replay Timer Timeout", /* Bit Position 12 */ > - "Advisory Non-Fatal", /* Bit Position 13 */ > - "Corrected Internal Error", /* Bit Position 14 */ > - "Header Log Overflow", /* Bit Position 15 */ > + "Timeout", /* Bit Position 12 */ > + "NonFatalErr", /* Bit Position 13 */ > + "CorrIntErr", /* Bit Position 14 */ > + "HeaderOF", /* Bit Position 15 */ > }; > > static const char *aer_uncorrectable_error_string[] = { > @@ -95,28 +95,28 @@ > NULL, > NULL, > NULL, > - "Data Link Protocol", /* Bit Position 4 */ > - "Surprise Down Error", /* Bit Position 5 */ > + "DLP", /* Bit Position 4 */ > + "SDES", /* Bit Position 5 */ > NULL, > NULL, > NULL, > NULL, > NULL, > NULL, > - "Poisoned TLP", /* Bit Position 12 */ > - "Flow Control Protocol", /* Bit Position 13 */ > - "Completion Timeout", /* Bit Position 14 */ > - "Completer Abort", /* Bit Position 15 */ > - "Unexpected Completion", /* Bit Position 16 */ > - "Receiver Overflow", /* Bit Position 17 */ > - "Malformed TLP", /* Bit Position 18 */ > + "TLP", /* Bit Position 12 */ > + "FCP", /* Bit Position 13 */ > + "CmpltTO", /* Bit Position 14 */ > + "CmpltAbrt", /* Bit Position 15 */ > + "UnxCmplt", /* Bit Position 16 */ > + "RxOF", /* Bit Position 17 */ > + "MalfTLP", /* Bit Position 18 */ > "ECRC", /* Bit Position 19 */ > - "Unsupported Request", /* Bit Position 20 */ > - "ACS Violation", /* Bit Position 21 */ > - "Uncorrectable Internal Error", /* Bit Position 22 */ > - "MC Blocked TLP", /* Bit Position 23 */ > - "AtomicOp Egress Blocked", /* Bit Position 24 */ > - "TLP Prefix Blocked Error", /* Bit Position 25 */ > + "UnsupReq", /* Bit Position 20 */ > + "ACSViol", /* Bit Position 21 */ > + "UncorrIntErr", /* Bit Position 22 */ > + "BlockedTLP", /* Bit Position 23 */ > + "AtomicOpBlocked", /* Bit Position 24 */ > + "TLPBlockedErr", /* Bit Position 25 */ > }; > > static const char *aer_agent_string[] = { > @@ -203,6 +203,29 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) > } > > #ifdef CONFIG_ACPI_APEI_PCIEAER > + > +#define MAX_PRINT_LENGTH 120 > + > +void dev_print_bits(struct pci_dev *dev, unsigned int bits, > + const char * const strs[], unsigned int strs_size) > +{ > + unsigned int i; > + char errs[MAX_PRINT_LENGTH]; > + > + errs[0] = '\0'; > + > + for (i = 0; i < strs_size; i++) { > + if (!(bits & (1U << i))) > + continue; > + if (strs[i]) { > + if (strlen(errs)) > + strlcat(errs, ", ", MAX_PRINT_LENGTH); > + strlcat(errs, strs[i], MAX_PRINT_LENGTH); > + } > + } > + dev_err(&dev->dev, "%s\n", errs); > +} > + > int cper_severity_to_aer(int cper_severity) > { > switch (cper_severity) { > @@ -240,7 +263,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, > agent = AER_GET_AGENT(aer_severity, status); > > pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); > - cper_print_bits("", status, status_strs, status_strs_size); > + dev_print_bits(dev, status, status_strs, status_strs_size); > pci_err(dev, "aer_layer=%s, aer_agent=%s\n", > aer_error_layer[layer], aer_agent_string[agent]); > > -- > Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. > Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, > a Linux Foundation Collaborative Project. >