Hi Dave, Please see few comments. >-----Original Message----- >From: Dave Jiang <dave.jiang@xxxxxxxxx> >Sent: 18 November 2022 17:09 >To: linux-cxl@xxxxxxxxxxxxxxx; linux-pci@xxxxxxxxxxxxxxx >Cc: dan.j.williams@xxxxxxxxx; ira.weiny@xxxxxxxxx; vishal.l.verma@xxxxxxxxx; >alison.schofield@xxxxxxxxx; Jonathan Cameron ><jonathan.cameron@xxxxxxxxxx>; rostedt@xxxxxxxxxxx; >terry.bowman@xxxxxxx; bhelgaas@xxxxxxxxxx >Subject: [PATCH v3 08/11] cxl/pci: add tracepoint events for CXL RAS > >Add tracepoint events for recording the CXL uncorrectable and correctable >errors. For uncorrectable errors, there is additional data of 512B from the >header log register (CXL spec rev3 8.2.4.16.7). The trace event will intake a >dynamic array that will dump the entire Header Log data. If multiple errors are >set in the status register, then the 'first error' field (CXL spec rev3 v8.2.4.16.6) >is read from the Error Capabilities and Control Register in order to determine >the error. > >This implementation does not include CXL IDE Error details. > >Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> >Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx> >--- > drivers/cxl/pci.c | 2 + > include/trace/events/cxl.h | 110 >++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 112 insertions(+) > create mode 100644 include/trace/events/cxl.h > >diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index >9428f3e0d99b..0f36a5861a7b 100644 >--- a/drivers/cxl/pci.c >+++ b/drivers/cxl/pci.c >@@ -13,6 +13,8 @@ > #include "cxlmem.h" > #include "cxlpci.h" > #include "cxl.h" >+#define CREATE_TRACE_POINTS >+#include <trace/events/cxl.h> > > /** > * DOC: cxl pci >diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h new file >mode 100644 index 000000000000..f8e95d977133 >--- /dev/null >+++ b/include/trace/events/cxl.h >@@ -0,0 +1,110 @@ >+/* SPDX-License-Identifier: GPL-2.0 */ >+#undef TRACE_SYSTEM >+#define TRACE_SYSTEM cxl >+ >+#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) >#define >+_CXL_EVENTS_H >+ >+#include <linux/tracepoint.h> >+ >+#define CXL_HEADERLOG_SIZE SZ_512 >+#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32) >+ >+#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) >+#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) >+#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2) >+#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3) >+#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4) >+#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5) >+#define CXL_RAS_UC_MEM_BE_PARITY BIT(6) >+#define CXL_RAS_UC_MEM_DATA_ECC BIT(7) >+#define CXL_RAS_UC_REINIT_THRESH BIT(8) >+#define CXL_RAS_UC_RSVD_ENCODE BIT(9) >+#define CXL_RAS_UC_POISON BIT(10) >+#define CXL_RAS_UC_RECV_OVERFLOW BIT(11) >+#define CXL_RAS_UC_INTERNAL_ERR BIT(14) >+#define CXL_RAS_UC_IDE_TX_ERR BIT(15) >+#define CXL_RAS_UC_IDE_RX_ERR BIT(16) >+ >+#define show_uc_errs(status) __print_flags(status, " | ", > \ >+ { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, > \ >+ { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, > \ >+ { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, >\ >+ { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, > \ >+ { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, > \ >+ { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" >}, \ >+ { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" >}, \ >+ { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, > \ >+ { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, > \ >+ { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, > \ >+ { CXL_RAS_UC_POISON, "Received Poison From Peer" }, > \ >+ { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, > \ >+ { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \ >+ { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \ >+ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ >+) >+ >+TRACE_EVENT(cxl_aer_uncorrectable_error, >+ TP_PROTO(const char *dev_name, u32 status, u32 fe, u32 *hl), >+ TP_ARGS(dev_name, status, fe, hl), >+ TP_STRUCT__entry( >+ __string(dev_name, dev_name) >+ __field(u32, status) >+ __field(u32, first_error) >+ __dynamic_array(u32, header_log, >CXL_HEADERLOG_SIZE_U32) >+ ), >+ TP_fast_assign( >+ __assign_str(dev_name, dev_name); >+ __entry->status = status; >+ __entry->first_error = fe; >+ /* >+ * Embed the 512B headerlog data for user app retrieval and >+ * parsing, but no need to print this in the trace buffer. >+ */ >+ memcpy(__get_dynamic_array(header_log), hl, >CXL_HEADERLOG_SIZE); >+ ), >+ TP_printk("%s: status: '%s' first_error: '%s'", >+ __get_str(dev_name), >+ show_uc_errs(__entry->status), >+ show_uc_errs(__entry->first_error) >+ ) >+); >+ >+#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0) >+#define CXL_RAS_CE_MEM_DATA_ECC BIT(1) >+#define CXL_RAS_CE_CRC_THRESH BIT(2) I think the Bit Location 3 "Retry_Threshold: Retry Threshold Hit. " as per the Correctable Error Status Register in the CXL 3.0 specification is missing? If so, please correct the bit location of the subsequent corrected errors as well. >+#define CXL_RAS_CE_CACHE_POISON BIT(3) >+#define CXL_RAS_CE_MEM_POISON BIT(4) >+#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(5) >+ >+#define show_ce_errs(status) __print_flags(status, " | ", > \ >+ { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, > \ >+ { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data Ecc Error" }, Please change "Ecc" to "ECC". > \ >+ { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, > \ >+ { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" >}, \ >+ { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" >}, \ >+ { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical >Layer" } \ >+) >+ >+TRACE_EVENT(cxl_aer_correctable_error, >+ TP_PROTO(const char *dev_name, u32 status), >+ TP_ARGS(dev_name, status), >+ TP_STRUCT__entry( >+ __string(dev_name, dev_name) >+ __field(u32, status) >+ ), >+ TP_fast_assign( >+ __assign_str(dev_name, dev_name); >+ __entry->status = status; >+ ), >+ TP_printk("%s: status: '%s'", >+ __get_str(dev_name), show_ce_errs(__entry->status) >+ ) >+); >+ >+#endif /* _CXL_EVENTS_H */ >+ >+/* This part must be outside protection */ #undef TRACE_INCLUDE_FILE >+#define TRACE_INCLUDE_FILE cxl #include <trace/define_trace.h> > Thanks, Shiju