[PATCH 33/42] ACPI, APEI, Add PCIe AER error information printing support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Huang Ying <ying.huang@xxxxxxxxx>

The AER error information printing support is implemented in
drivers/pci/pcie/aer/aer_print.c.  So some string constants, functions
and macros definitions can be re-used without being exported.

The original PCIe AER error information printing function is not
re-used directly because the overall format is quite different.  And
changing the original printing format may make some original users'
scripts broken.

Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>
CC: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx>
CC: Zhang Yanmin <yanmin.zhang@xxxxxxxxx>
Signed-off-by: Len Brown <len.brown@xxxxxxxxx>
---
 Documentation/acpi/apei/output_format.txt |   25 ++++++++++++
 drivers/acpi/apei/Kconfig                 |    7 +++
 drivers/acpi/apei/cper.c                  |   18 +++++++--
 drivers/pci/pcie/aer/aerdrv.h             |    9 +----
 drivers/pci/pcie/aer/aerdrv_errprint.c    |   59 +++++++++++++++++++++++++++++
 include/linux/aer.h                       |   24 ++++++++++++
 include/linux/cper.h                      |    2 +
 7 files changed, 132 insertions(+), 12 deletions(-)

diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
index 9146952..0c49c19 100644
--- a/Documentation/acpi/apei/output_format.txt
+++ b/Documentation/acpi/apei/output_format.txt
@@ -92,6 +92,11 @@ vendor_id: <integer>, device_id: <integer>
 class_code: <integer>]
 [serial number: <integer>, <integer>]
 [bridge: secondary_status: <integer>, control: <integer>]
+[aer_status: <integer>, aer_mask: <integer>
+<aer status string>
+[aer_uncor_severity: <integer>]
+aer_layer=<aer layer string>, aer_agent=<aer agent string>
+aer_tlp_header: <integer> <integer> <integer> <integer>]
 
 <pcie port type string>* := PCIe end point | legacy PCI end point | \
 unknown | unknown | root port | upstream switch port | \
@@ -99,6 +104,26 @@ downstream switch port | PCIe to PCI/PCI-X bridge | \
 PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
 root complex event collector
 
+if section severity is fatal or recoverable
+<aer status string># :=
+unknown | unknown | unknown | unknown | Data Link Protocol | \
+unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
+Poisoned TLP | Flow Control Protocol | Completion Timeout | \
+Completer Abort | Unexpected Completion | Receiver Overflow | \
+Malformed TLP | ECRC | Unsupported Request
+else
+<aer status string># :=
+Receiver Error | unknown | unknown | unknown | unknown | unknown | \
+Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
+Replay Timer Timeout | Advisory Non-Fatal
+fi
+
+<aer layer string> :=
+Physical Layer | Data Link Layer | Transaction Layer
+
+<aer agent string> :=
+Receiver ID | Requester ID | Completer ID | Transmitter ID
+
 Where, [] designate corresponding content is optional
 
 All <field string> description with * has the following format:
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index fca34cc..9ecf6fe 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -21,6 +21,13 @@ config ACPI_APEI_GHES
 	  by firmware to produce more valuable hardware error
 	  information for Linux.
 
+config ACPI_APEI_PCIEAER
+	bool "APEI PCIe AER logging/recovering support"
+	depends on ACPI_APEI && PCIEAER
+	help
+	  PCIe AER errors may be reported via APEI firmware first mode.
+	  Turn on this option to enable the corresponding support.
+
 config ACPI_APEI_EINJ
 	tristate "APEI Error INJection (EINJ)"
 	depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index 31464a0..5d41894 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
 #include <linux/time.h>
 #include <linux/cper.h>
 #include <linux/acpi.h>
+#include <linux/aer.h>
 
 /*
  * CPER record ID need to be unique even after reboot, because record
@@ -70,8 +71,8 @@ static const char *cper_severity_str(unsigned int severity)
  * If the output length is longer than 80, multiple line will be
  * printed, with @pfx is printed at the beginning of each line.
  */
-static void cper_print_bits(const char *pfx, unsigned int bits,
-			    const char *strs[], unsigned int strs_size)
+void cper_print_bits(const char *pfx, unsigned int bits,
+		     const char *strs[], unsigned int strs_size)
 {
 	int i, len = 0;
 	const char *str;
@@ -81,6 +82,8 @@ static void cper_print_bits(const char *pfx, unsigned int bits,
 		if (!(bits & (1U << i)))
 			continue;
 		str = strs[i];
+		if (!str)
+			continue;
 		if (len && len + strlen(str) + 2 > 80) {
 			printk("%s\n", buf);
 			len = 0;
@@ -243,7 +246,8 @@ static const char *cper_pcie_port_type_strs[] = {
 	"root complex event collector",
 };
 
-static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
+static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
+			    const struct acpi_hest_generic_data *gdata)
 {
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
@@ -276,6 +280,12 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
 		printk(
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
+		cper_print_aer(pfx, gdata->error_severity, aer_regs);
+	}
+#endif
 }
 
 static const char *apei_estatus_section_flag_strs[] = {
@@ -322,7 +332,7 @@ static void apei_estatus_print_section(
 		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
 		printk("%s""section_type: PCIe error\n", pfx);
 		if (gdata->error_data_length >= sizeof(*pcie))
-			cper_print_pcie(pfx, pcie);
+			cper_print_pcie(pfx, pcie, gdata);
 		else
 			goto err_section_too_small;
 	} else
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 80c11d1..3eb7708 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -35,13 +35,6 @@
 					PCI_ERR_UNC_UNX_COMP|		\
 					PCI_ERR_UNC_MALF_TLP)
 
-struct header_log_regs {
-	unsigned int dw0;
-	unsigned int dw1;
-	unsigned int dw2;
-	unsigned int dw3;
-};
-
 #define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
 struct aer_err_info {
 	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
@@ -59,7 +52,7 @@ struct aer_err_info {
 
 	unsigned int status;		/* COR/UNCOR Error Status */
 	unsigned int mask;		/* COR/UNCOR Error Mask */
-	struct header_log_regs tlp;	/* TLP Header */
+	struct aer_header_log_regs tlp;	/* TLP Header */
 };
 
 struct aer_err_source {
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 7a237f6..b07a42e 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/pm.h>
 #include <linux/suspend.h>
+#include <linux/cper.h>
 
 #include "aerdrv.h"
 
@@ -201,3 +202,61 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
 		info->multi_error_valid ? "Multiple " : "",
 		aer_error_severity_string[info->severity], info->id);
 }
+
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+static int cper_severity_to_aer(int cper_severity)
+{
+	switch (cper_severity) {
+	case CPER_SEV_RECOVERABLE:
+		return AER_NONFATAL;
+	case CPER_SEV_FATAL:
+		return AER_FATAL;
+	default:
+		return AER_CORRECTABLE;
+	}
+}
+
+void cper_print_aer(const char *prefix, int cper_severity,
+		    struct aer_capability_regs *aer)
+{
+	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
+	u32 status, mask;
+	const char **status_strs;
+
+	aer_severity = cper_severity_to_aer(cper_severity);
+	if (aer_severity == AER_CORRECTABLE) {
+		status = aer->cor_status;
+		mask = aer->cor_mask;
+		status_strs = aer_correctable_error_string;
+		status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
+	} else {
+		status = aer->uncor_status;
+		mask = aer->uncor_mask;
+		status_strs = aer_uncorrectable_error_string;
+		status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
+		tlp_header_valid = status & AER_LOG_TLP_MASKS;
+	}
+	layer = AER_GET_LAYER_ERROR(aer_severity, status);
+	agent = AER_GET_AGENT(aer_severity, status);
+	printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
+	       prefix, status, mask);
+	cper_print_bits(prefix, status, status_strs, status_strs_size);
+	printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
+	       aer_error_layer[layer], aer_agent_string[agent]);
+	if (aer_severity != AER_CORRECTABLE)
+		printk("%s""aer_uncor_severity: 0x%08x\n",
+		       prefix, aer->uncor_severity);
+	if (tlp_header_valid) {
+		const unsigned char *tlp;
+		tlp = (const unsigned char *)&aer->header_log;
+		printk("%s""aer_tlp_header:"
+			" %02x%02x%02x%02x %02x%02x%02x%02x"
+			" %02x%02x%02x%02x %02x%02x%02x%02x\n",
+			prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
+			*(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
+			*(tlp + 11), *(tlp + 10), *(tlp + 9),
+			*(tlp + 8), *(tlp + 15), *(tlp + 14),
+			*(tlp + 13), *(tlp + 12));
+	}
+}
+#endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f7df1ee..8414de2 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -7,6 +7,28 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+struct aer_header_log_regs {
+	unsigned int dw0;
+	unsigned int dw1;
+	unsigned int dw2;
+	unsigned int dw3;
+};
+
+struct aer_capability_regs {
+	u32 header;
+	u32 uncor_status;
+	u32 uncor_mask;
+	u32 uncor_severity;
+	u32 cor_status;
+	u32 cor_mask;
+	u32 cap_control;
+	struct aer_header_log_regs header_log;
+	u32 root_command;
+	u32 root_status;
+	u16 cor_err_source;
+	u16 uncor_err_source;
+};
+
 #if defined(CONFIG_PCIEAER)
 /* pci-e port driver needs this function to enable aer */
 extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
@@ -27,5 +49,7 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 #endif
 
+extern void cper_print_aer(const char *prefix, int cper_severity,
+			   struct aer_capability_regs *aer);
 #endif //_AER_H_
 
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 3104aaf..372a258 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -388,5 +388,7 @@ struct cper_sec_pcie {
 #pragma pack()
 
 u64 cper_next_record_id(void);
+void cper_print_bits(const char *prefix, unsigned int bits,
+		     const char *strs[], unsigned int strs_size);
 
 #endif
-- 
1.7.4.1.343.ga91df

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux