Re: [PATCH] cxl/pci: Move tracepoint definitions to drivers/cxl/core/

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Dec 08, 2022 at 09:02:00AM -0800, Dan Williams wrote:
> CXL is using tracepoints for reporting RAS capability register payloads
> for AER events, and has plans to use tracepoints for the output payload
> of Get Poison List and Get Event Records commands. For organization
> purposes it would be nice to keep those all under a single + local CXL
> trace system. This also organization also potentially helps in the
> future when CXL drivers expand beyond generic memory expanders, however
> that would also entail a move away from the expander-specific
> cxl_dev_state context, save that for later.
> 
> Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl'
> trace system, however, it is unlikely that a single platform will ever
> load both drivers simultaneously.

Verified this on top of cxl/next with the get-poison-list patchset
using the new trace file layout.

Also, confirmed that the cxl_aer_*_error events appeared correctly
in sys/kernel/debug/tracing/events/ and that 'cxl monitor' could 
be run.

Tested-by: Alison Schofield <alison.schofield@xxxxxxxxx>

> 
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
> ---
> This patch is targeting v6.3.  I am sending it out now to enable the
> in-flight Event and Poison list patch sets to build upon. It will not
> move to a non-rebasing branch until after v6.2-rc2, but in the meantime
> I can throw it out on the list and the cxl/preview branch.
> 
>  drivers/cxl/core/Makefile  |    3 +
>  drivers/cxl/core/pci.c     |  112 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/trace.c   |    5 ++
>  drivers/cxl/core/trace.h   |   11 ++--
>  drivers/cxl/cxl.h          |    2 +
>  drivers/cxl/cxlpci.h       |    3 +
>  drivers/cxl/pci.c          |  111 --------------------------------------------
>  tools/testing/cxl/Kbuild   |    2 +
>  8 files changed, 131 insertions(+), 118 deletions(-)
>  create mode 100644 drivers/cxl/core/trace.c
>  rename include/trace/events/cxl.h => drivers/cxl/core/trace.h (94%)
> 
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 79c7257f4107..ca4ae31d8f57 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o
>  obj-$(CONFIG_CXL_SUSPEND) += suspend.o
>  
>  ccflags-y += -I$(srctree)/drivers/cxl
> +CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
> +
>  cxl_core-y := port.o
>  cxl_core-y += pmem.o
>  cxl_core-y += regs.o
> @@ -10,4 +12,5 @@ cxl_core-y += memdev.o
>  cxl_core-y += mbox.o
>  cxl_core-y += pci.o
>  cxl_core-y += hdm.o
> +cxl_core-$(CONFIG_TRACING) += trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += region.o
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 57764e9cd19d..1d1492440287 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -9,6 +9,7 @@
>  #include <cxlmem.h>
>  #include <cxl.h>
>  #include "core.h"
> +#include "trace.h"
>  
>  /**
>   * DOC: cxl core pci
> @@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port)
>  	}
>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> +
> +void cxl_cor_error_detected(struct pci_dev *pdev)
> +{
> +	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> +	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> +	struct device *dev = &cxlmd->dev;
> +	void __iomem *addr;
> +	u32 status;
> +
> +	if (!cxlds->regs.ras)
> +		return;
> +
> +	addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> +	status = readl(addr);
> +	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> +		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> +		trace_cxl_aer_correctable_error(dev, status);
> +	}
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
> +
> +/* CXL spec rev3.0 8.2.4.16.1 */
> +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
> +{
> +	void __iomem *addr;
> +	u32 *log_addr;
> +	int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
> +
> +	addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
> +	log_addr = log;
> +
> +	for (i = 0; i < log_u32_size; i++) {
> +		*log_addr = readl(addr);
> +		log_addr++;
> +		addr += sizeof(u32);
> +	}
> +}
> +
> +/*
> + * Log the state of the RAS status registers and prepare them to log the
> + * next error status. Return 1 if reset needed.
> + */
> +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
> +{
> +	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> +	struct device *dev = &cxlmd->dev;
> +	u32 hl[CXL_HEADERLOG_SIZE_U32];
> +	void __iomem *addr;
> +	u32 status;
> +	u32 fe;
> +
> +	if (!cxlds->regs.ras)
> +		return false;
> +
> +	addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
> +	status = readl(addr);
> +	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
> +		return false;
> +
> +	/* If multiple errors, log header points to first error from ctrl reg */
> +	if (hweight32(status) > 1) {
> +		addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
> +		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
> +	} else {
> +		fe = status;
> +	}
> +
> +	header_log_copy(cxlds, hl);
> +	trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
> +	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
> +
> +	return true;
> +}
> +
> +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
> +				    pci_channel_state_t state)
> +{
> +	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> +	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> +	struct device *dev = &cxlmd->dev;
> +	bool ue;
> +
> +	/*
> +	 * A frozen channel indicates an impending reset which is fatal to
> +	 * CXL.mem operation, and will likely crash the system. On the off
> +	 * chance the situation is recoverable dump the status of the RAS
> +	 * capability registers and bounce the active state of the memdev.
> +	 */
> +	ue = cxl_report_and_clear(cxlds);
> +
> +	switch (state) {
> +	case pci_channel_io_normal:
> +		if (ue) {
> +			device_release_driver(dev);
> +			return PCI_ERS_RESULT_NEED_RESET;
> +		}
> +		return PCI_ERS_RESULT_CAN_RECOVER;
> +	case pci_channel_io_frozen:
> +		dev_warn(&pdev->dev,
> +			 "%s: frozen state error detected, disable CXL.mem\n",
> +			 dev_name(dev));
> +		device_release_driver(dev);
> +		return PCI_ERS_RESULT_NEED_RESET;
> +	case pci_channel_io_perm_failure:
> +		dev_warn(&pdev->dev,
> +			 "failure state error detected, request disconnect\n");
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +	return PCI_ERS_RESULT_NEED_RESET;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL);
> diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
> new file mode 100644
> index 000000000000..29ae7ce81dc5
> --- /dev/null
> +++ b/drivers/cxl/core/trace.c
> @@ -0,0 +1,5 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> +
> +#define CREATE_TRACE_POINTS
> +#include "trace.h"
> diff --git a/include/trace/events/cxl.h b/drivers/cxl/core/trace.h
> similarity index 94%
> rename from include/trace/events/cxl.h
> rename to drivers/cxl/core/trace.h
> index ad085a2534ef..20ca2fe2ca8e 100644
> --- a/include/trace/events/cxl.h
> +++ b/drivers/cxl/core/trace.h
> @@ -1,15 +1,14 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
>  #undef TRACE_SYSTEM
>  #define TRACE_SYSTEM cxl
>  
>  #if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
>  #define _CXL_EVENTS_H
>  
> +#include <cxl.h>
>  #include <linux/tracepoint.h>
>  
> -#define CXL_HEADERLOG_SIZE		SZ_512
> -#define CXL_HEADERLOG_SIZE_U32		SZ_512 / sizeof(u32)
> -
>  #define CXL_RAS_UC_CACHE_DATA_PARITY	BIT(0)
>  #define CXL_RAS_UC_CACHE_ADDR_PARITY	BIT(1)
>  #define CXL_RAS_UC_CACHE_BE_PARITY	BIT(2)
> @@ -106,7 +105,5 @@ TRACE_EVENT(cxl_aer_correctable_error,
>  
>  #endif /* _CXL_EVENTS_H */
>  
> -/* This part must be outside protection */
> -#undef TRACE_INCLUDE_FILE
> -#define TRACE_INCLUDE_FILE cxl
> +#define TRACE_INCLUDE_FILE trace
>  #include <trace/define_trace.h>
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 1b1cf459ac77..aa3af3bb73b2 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -140,6 +140,8 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
>  #define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
>  #define CXL_RAS_HEADER_LOG_OFFSET 0x18
>  #define CXL_RAS_CAPABILITY_LENGTH 0x58
> +#define CXL_HEADERLOG_SIZE SZ_512
> +#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32)
>  
>  /* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
>  #define CXLDEV_CAP_ARRAY_OFFSET 0x0
> diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
> index 920909791bb9..77dbdb980b12 100644
> --- a/drivers/cxl/cxlpci.h
> +++ b/drivers/cxl/cxlpci.h
> @@ -66,4 +66,7 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port);
>  struct cxl_dev_state;
>  int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
>  void read_cdat_data(struct cxl_port *port);
> +void cxl_cor_error_detected(struct pci_dev *pdev);
> +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
> +				    pci_channel_state_t state);
>  #endif /* __CXL_PCI_H__ */
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 33083a522fd1..3a66aadb4df0 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -14,8 +14,6 @@
>  #include "cxlmem.h"
>  #include "cxlpci.h"
>  #include "cxl.h"
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/cxl.h>
>  
>  /**
>   * DOC: cxl pci
> @@ -514,96 +512,6 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
>  };
>  MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
>  
> -/* CXL spec rev3.0 8.2.4.16.1 */
> -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
> -{
> -	void __iomem *addr;
> -	u32 *log_addr;
> -	int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
> -
> -	addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
> -	log_addr = log;
> -
> -	for (i = 0; i < log_u32_size; i++) {
> -		*log_addr = readl(addr);
> -		log_addr++;
> -		addr += sizeof(u32);
> -	}
> -}
> -
> -/*
> - * Log the state of the RAS status registers and prepare them to log the
> - * next error status. Return 1 if reset needed.
> - */
> -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
> -{
> -	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> -	struct device *dev = &cxlmd->dev;
> -	u32 hl[CXL_HEADERLOG_SIZE_U32];
> -	void __iomem *addr;
> -	u32 status;
> -	u32 fe;
> -
> -	if (!cxlds->regs.ras)
> -		return false;
> -
> -	addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
> -	status = readl(addr);
> -	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
> -		return false;
> -
> -	/* If multiple errors, log header points to first error from ctrl reg */
> -	if (hweight32(status) > 1) {
> -		addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
> -		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
> -	} else {
> -		fe = status;
> -	}
> -
> -	header_log_copy(cxlds, hl);
> -	trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
> -	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
> -
> -	return true;
> -}
> -
> -static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
> -					   pci_channel_state_t state)
> -{
> -	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> -	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> -	struct device *dev = &cxlmd->dev;
> -	bool ue;
> -
> -	/*
> -	 * A frozen channel indicates an impending reset which is fatal to
> -	 * CXL.mem operation, and will likely crash the system. On the off
> -	 * chance the situation is recoverable dump the status of the RAS
> -	 * capability registers and bounce the active state of the memdev.
> -	 */
> -	ue = cxl_report_and_clear(cxlds);
> -
> -	switch (state) {
> -	case pci_channel_io_normal:
> -		if (ue) {
> -			device_release_driver(dev);
> -			return PCI_ERS_RESULT_NEED_RESET;
> -		}
> -		return PCI_ERS_RESULT_CAN_RECOVER;
> -	case pci_channel_io_frozen:
> -		dev_warn(&pdev->dev,
> -			 "%s: frozen state error detected, disable CXL.mem\n",
> -			 dev_name(dev));
> -		device_release_driver(dev);
> -		return PCI_ERS_RESULT_NEED_RESET;
> -	case pci_channel_io_perm_failure:
> -		dev_warn(&pdev->dev,
> -			 "failure state error detected, request disconnect\n");
> -		return PCI_ERS_RESULT_DISCONNECT;
> -	}
> -	return PCI_ERS_RESULT_NEED_RESET;
> -}
> -
>  static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
>  {
>  	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> @@ -628,25 +536,6 @@ static void cxl_error_resume(struct pci_dev *pdev)
>  		 dev->driver ? "successful" : "failed");
>  }
>  
> -static void cxl_cor_error_detected(struct pci_dev *pdev)
> -{
> -	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> -	struct cxl_memdev *cxlmd = cxlds->cxlmd;
> -	struct device *dev = &cxlmd->dev;
> -	void __iomem *addr;
> -	u32 status;
> -
> -	if (!cxlds->regs.ras)
> -		return;
> -
> -	addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
> -	status = readl(addr);
> -	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
> -		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
> -		trace_cxl_aer_correctable_error(dev, status);
> -	}
> -}
> -
>  static const struct pci_error_handlers cxl_error_handlers = {
>  	.error_detected	= cxl_error_detected,
>  	.slot_reset	= cxl_slot_reset,
> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
> index 0805f08af8b3..12af1c9270ff 100644
> --- a/tools/testing/cxl/Kbuild
> +++ b/tools/testing/cxl/Kbuild
> @@ -17,6 +17,7 @@ CXL_SRC := $(DRIVERS)/cxl
>  CXL_CORE_SRC := $(DRIVERS)/cxl/core
>  ccflags-y := -I$(srctree)/drivers/cxl/
>  ccflags-y += -D__mock=__weak
> +ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/
>  
>  obj-m += cxl_acpi.o
>  
> @@ -49,6 +50,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o
>  cxl_core-y += $(CXL_CORE_SRC)/mbox.o
>  cxl_core-y += $(CXL_CORE_SRC)/pci.o
>  cxl_core-y += $(CXL_CORE_SRC)/hdm.o
> +cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
>  cxl_core-y += config_check.o
>  
> 



[Index of Archives]     [DMA Engine]     [Linux Coverity]     [Linux USB]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Greybus]

  Powered by Linux