Re: [PATCH v3] platform/x86/amd/pmc: Add AMD MP2 STB functionality

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 3/4/2024 00:41, Basavaraj Natikar wrote:
AMD MP2 STB function provides a data buffer used to log debug information
about the system execution during S2Idle suspend/resume.

A data buffer known as the STB (Smart Trace Buffer) is a circular buffer
which is a low-level log to assist in debugging by providing insights
into any potential hangs or stalls that may occur during the S2Idle
suspend/resume processes.

The current PMC driver retrieves STB data from MP1, but there can be
scenarios where MP1 might hang or become unresponsive, leading to the
loss of critical data present in the STB buffer. This defeats the purpose
of the STB buffer, which was originally meant to help identify system
failures.

This feature creates stb_read_previous_boot debugfs allows users to
retrieve the STB log from MP2 specifically from the last occurrence of
the S2Idle suspend/resume. A userspace daemon can access STB log of last
S2Idle suspend/resume which can help to troubleshoot potential issues
related to hangs or stalls during the S2Idle suspend/resume sequence.

Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@xxxxxxx>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
---
Changes in v3:
	- Changed an error path for deinitialization or freeing resources.
	- Modified the macro and variable to have a more specific name.
	- Included header files for USEC_PER_MSEC, writeq, and writel.
	- Switched the data type of the 'len' variable from 'int' to
	  'unsigned int'.
	- Used one variable for return values.

Changes in v2:
	- Used linux/sizes.h instead of defining KB.
	- Defined timeout containing unit as per USEC_PER_MSEC.
	- Modified macro of AMD_C2P_MSG and AMD_P2C_MSG.
	- Changed string stb to STB in log messages.
	- Defined literals using Macro.

  drivers/platform/x86/amd/pmc/Kconfig   |  23 +++
  drivers/platform/x86/amd/pmc/Makefile  |   1 +
  drivers/platform/x86/amd/pmc/mp2_stb.c | 260 +++++++++++++++++++++++++
  drivers/platform/x86/amd/pmc/pmc.c     |   4 +
  drivers/platform/x86/amd/pmc/pmc.h     |  14 ++
  5 files changed, 302 insertions(+)
  create mode 100644 drivers/platform/x86/amd/pmc/mp2_stb.c

diff --git a/drivers/platform/x86/amd/pmc/Kconfig b/drivers/platform/x86/amd/pmc/Kconfig
index 883c0a95ac0c..8177486babf9 100644
--- a/drivers/platform/x86/amd/pmc/Kconfig
+++ b/drivers/platform/x86/amd/pmc/Kconfig
@@ -18,3 +18,26 @@ config AMD_PMC
If you choose to compile this driver as a module the module will be
  	  called amd-pmc.
+
+config AMD_MP2_STB
+	bool "AMD SoC MP2 STB function"
+	depends on AMD_PMC

What do you think about adding:
	default AMD_PMC

Then this will be essentially opt out and any distros that enable AMD_PMC by default will also start enabling this without extra prodding.

+	help
+	  AMD MP2 STB function provides a data buffer used to log debug
+	  information about the system execution during S2Idle suspend/resume.
+	  A data buffer known as the STB (Smart Trace Buffer) is a circular
+	  buffer which is a low-level log for the SoC which is used to debug
+	  any hangs/stalls during S2Idle suspend/resume.
+
+	  The current PMC driver retrieves STB data from MP1, but there can be
+	  scenarios where MP1 might hang or become unresponsive, leading to the
+	  loss of critical data present in the STB buffer. This defeats the
+	  purpose of the STB buffer, which was originally meant to help identify
+	  system failures.
+
+	  Creates stb_read_previous_boot debugfs to get STB, a userspace daemon
+	  can access STB log of last S2Idle suspend/resume which can help to
+	  debug if hangs/stalls during S2Idle suspend/resume
+
+	  Say Y or N here if you have a notebook powered by AMD RYZEN CPU/APU
+	  and supports MP2 STB.

This last sentence reads as a confusing statement.  I would just drop it.

diff --git a/drivers/platform/x86/amd/pmc/Makefile b/drivers/platform/x86/amd/pmc/Makefile
index 4aaa29d351c9..f1d9ab19d24c 100644
--- a/drivers/platform/x86/amd/pmc/Makefile
+++ b/drivers/platform/x86/amd/pmc/Makefile
@@ -6,3 +6,4 @@
amd-pmc-objs := pmc.o pmc-quirks.o
  obj-$(CONFIG_AMD_PMC) += amd-pmc.o
+amd-pmc-$(CONFIG_AMD_MP2_STB) += mp2_stb.o
diff --git a/drivers/platform/x86/amd/pmc/mp2_stb.c b/drivers/platform/x86/amd/pmc/mp2_stb.c
new file mode 100644
index 000000000000..4acea31daaaa
--- /dev/null
+++ b/drivers/platform/x86/amd/pmc/mp2_stb.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD MP2 STB layer
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@xxxxxxx>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <linux/time.h>
+
+#include "pmc.h"
+
+#define VALID_MSG 0xA
+#define VALID_RESPONSE 2
+
+#define AMD_C2P_MSG0 0x10500
+#define AMD_C2P_MSG1 0x10504
+#define AMD_P2C_MSG0 0x10680
+#define AMD_P2C_MSG1 0x10684
+
+#define MP2_RESP_SLEEP_US 500
+#define MP2_RESP_TIMEOUT_US (1600 * USEC_PER_MSEC)
+
+#define MP2_STB_DATA_LEN_2KB 1
+#define MP2_STB_DATA_LEN_16KB 4
+
+static struct amd_mp2_dev *mp2;

Do you really need a global variable? I would think this can all be children of the amd_pmc dev and you can pass pointers and references to that around instead.

+
+struct mp2_cmd_base {
+	union {
+		u32 ul;
+		struct {
+			u32 cmd_id : 4;
+			u32 intr_disable : 1;
+			u32 is_dma_used : 1;
+			u32 rsvd : 26;
+		} cmd;
+	};
+};
+
+struct mp2_cmd_response {
+	union {
+		u32 resp;
+		struct {
+			u32 cmd_id : 4;
+			u32 status : 4;
+			u32 response : 4;
+			u32 rsvd2 : 20;
+		} response;
+	};
+};
+
+struct mp2_stb_data_valid {
+	union {
+		u32 data_valid;
+		struct {
+			u32 valid : 16;
+			u32 length : 16;
+		} val;
+	};
+};
+
+static int amd_mp2_wait_response(u8 cmd_id, u32 command_sts)
+{
+	struct mp2_cmd_response cmd_resp;
+
+	if (!readl_poll_timeout(mp2->mmio + AMD_P2C_MSG0, cmd_resp.resp,
+				(cmd_resp.response.response == 0x0 &&
+				 cmd_resp.response.status == command_sts &&
+				 cmd_resp.response.cmd_id == cmd_id), MP2_RESP_SLEEP_US,
+				 MP2_RESP_TIMEOUT_US))
+		return cmd_resp.response.status;
+
+	return -ETIMEDOUT;
+}
+
+static void amd_mp2_stb_send_cmd(u8 cmd_id, bool is_dma_used)
+{
+	struct mp2_cmd_base cmd_base;
+
+	cmd_base.ul = 0;
+	cmd_base.cmd.cmd_id = cmd_id;
+	cmd_base.cmd.intr_disable = 1;
+	cmd_base.cmd.is_dma_used = is_dma_used;
+
+	writeq(mp2->dma_addr, mp2->mmio + AMD_C2P_MSG1);
+	writel(cmd_base.ul, mp2->mmio + AMD_C2P_MSG0);
+}
+
+static int amd_mp2_stb_region(unsigned int len)
+{
+	struct device *dev = &mp2->pdev->dev;
+
+	len *= SZ_1K;
+
+	if (!mp2->stbdata) {
+		mp2->vslbase = dmam_alloc_coherent(dev, len, &mp2->dma_addr, GFP_KERNEL);
+		if (!mp2->vslbase)
+			return -ENOMEM;
+
+		mp2->stbdata = devm_kzalloc(dev, len, GFP_KERNEL);
+		if (!mp2->stbdata)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int amd_mp2_process_cmd(struct file *filp)
+{
+	struct device *dev = &mp2->pdev->dev;
+	struct mp2_stb_data_valid stb_dv;
+	int status;
+
+	stb_dv.data_valid = readl(mp2->mmio + AMD_P2C_MSG1);
+
+	if (stb_dv.val.valid != VALID_MSG) {
+		dev_err(dev, "Invalid STB data\n");
+		return -EBADMSG;
+	}
+
+	if (stb_dv.val.length != MP2_STB_DATA_LEN_2KB &&
+	    stb_dv.val.length != MP2_STB_DATA_LEN_16KB) {
+		dev_err(dev, "Unsupported length\n");
+		return -EMSGSIZE;
+	}
+
+	mp2->stb_len = BIT(stb_dv.val.length);
+
+	status = amd_mp2_stb_region(mp2->stb_len);
+	if (status) {
+		dev_err(dev, "Failed to init STB region, status %d\n", status);
+		return status;
+	}
+
+	amd_mp2_stb_send_cmd(VALID_MSG, true);
+	status = amd_mp2_wait_response(VALID_MSG, VALID_RESPONSE);
+	if (status == VALID_RESPONSE) {
+		memcpy_fromio(mp2->stbdata, mp2->vslbase, SZ_1K * mp2->stb_len);
+		filp->private_data = mp2->stbdata;
+		mp2->is_stb_data = true;
+	} else {
+		dev_err(dev, "Failed to start STB dump, status %d\n", status);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int amd_mp2_stb_debugfs_open(struct inode *inode, struct file *filp)
+{
+	if (!mp2->is_stb_data)
+		return amd_mp2_process_cmd(filp);
+
+	filp->private_data = mp2->stbdata;
+
+	return 0;
+}
+
+static ssize_t amd_mp2_stb_debugfs_read(struct file *filp, char __user *buf, size_t size,
+					loff_t *pos)
+{
+	if (!filp->private_data)
+		return -EINVAL;
+
+	return simple_read_from_buffer(buf, size, pos, filp->private_data, SZ_1K * mp2->stb_len);
+}
+
+static const struct file_operations amd_mp2_stb_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = amd_mp2_stb_debugfs_open,
+	.read = amd_mp2_stb_debugfs_read,
+};
+
+static void amd_mp2_dbgfs_register(struct amd_pmc_dev *dev)
+{
+	if (!dev->dbgfs_dir)
+		return;
Is this right?  I don't see dbgfs_dir being initialized.

+
+	debugfs_create_file("stb_read_previous_boot", 0644, dev->dbgfs_dir, dev,
+			    &amd_mp2_stb_debugfs_fops);
+}
+
+void amd_mp2_stb_deinit(void)
+{
+	struct pci_dev *pdev;
+
+	if (mp2 && mp2->pdev) {
+		pdev = mp2->pdev;
+		pci_dev_put(pdev);
+		if (mp2->devres_gid)
+			devres_release_group(&pdev->dev, mp2->devres_gid);
+		mp2 = NULL;
+	}
+}
+
+void amd_mp2_stb_init(struct amd_pmc_dev *dev)
+{
+	struct pci_dev *pdev;
+	int rc;
+
+	mp2 = devm_kzalloc(dev->dev, sizeof(*mp2), GFP_KERNEL);
+	if (!mp2)
+		goto mp2_error;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_AMD, 0x172c, NULL);

Three things:
1) Could you put the definition for it in amd/pmc/pmc.h?
2) Is this device ID always going to be 0x172c? If not, should it be paired with a specific check?

IE something like if the root device is PCI_DEVICE_ID_AMD_1AH_M20H_ROOT then look for 0x172c etc?

You could make a helper that is

static int amd_mp2_get_stb() to abstract it all then.

This could scale better then in the future when more device IDs are added.

3) Does this PCI ID get used for anything else? On a matching platform will amd-sfh use it or anything?

+	if (!pdev)
+		goto mp2_error;
+

If you re-order these two calls, no extra clean up will be needed if the PCI device is missing.

+	mp2->pdev = pdev;
+
+	mp2->devres_gid = devres_open_group(&pdev->dev, NULL, GFP_KERNEL);
+	if (!mp2->devres_gid) {
+		dev_err(&pdev->dev, "devres_open_group failed\n");
+		goto mp2_error;
+	}
+
+	rc = pcim_enable_device(pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "pcim_enable_device failed\n");
+		goto mp2_error;
+	}
+
+	rc = pcim_iomap_regions(pdev, BIT(2), "mp2 stb");
+	if (rc) {
+		dev_err(&pdev->dev, "pcim_iomap_regions failed\n");
+		goto mp2_error;
+	}
+
+	/* MP2 MMIO is mapped to BAR2 */
+	mp2->mmio = pcim_iomap_table(pdev)[2];
+	if (!mp2->mmio) {
+		dev_err(&pdev->dev, "pcim_iomap_table failed\n");
+		goto mp2_error;
+	}
+
+	pci_set_master(pdev);

AFAICT you don't undo setting master (pci_clear_master) if the DMA set mask fails nor on the cleanup path.

+
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (rc) {
+		dev_err(&pdev->dev, "failed to set DMA mask\n");
+		goto mp2_error;
+	}
+
+	amd_mp2_dbgfs_register(dev);
+	dev->mp2 = mp2;
+
+	return;
+
+mp2_error:
+	amd_mp2_stb_deinit();
+}
diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c
index 108e12fd580f..926257125ac1 100644
--- a/drivers/platform/x86/amd/pmc/pmc.c
+++ b/drivers/platform/x86/amd/pmc/pmc.c
@@ -1106,6 +1106,8 @@ static int amd_pmc_probe(struct platform_device *pdev)
  	}
amd_pmc_dbgfs_register(dev);
+	if (IS_ENABLED(CONFIG_AMD_MP2_STB))
+		amd_mp2_stb_init(dev);
  	pm_report_max_hw_sleep(U64_MAX);
  	return 0;
@@ -1122,6 +1124,8 @@ static void amd_pmc_remove(struct platform_device *pdev)
  		acpi_unregister_lps0_dev(&amd_pmc_s2idle_dev_ops);
  	amd_pmc_dbgfs_unregister(dev);
  	pci_dev_put(dev->rdev);
+	if (IS_ENABLED(CONFIG_AMD_MP2_STB))
+		amd_mp2_stb_deinit();
  	mutex_destroy(&dev->lock);
  }
diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h
index 827eef65e133..a49af1fa0060 100644
--- a/drivers/platform/x86/amd/pmc/pmc.h
+++ b/drivers/platform/x86/amd/pmc/pmc.h
@@ -14,6 +14,17 @@
  #include <linux/types.h>
  #include <linux/mutex.h>
+struct amd_mp2_dev {
+	void __iomem *mmio;
+	void __iomem *vslbase;
+	void *stbdata;
+	void *devres_gid;
+	struct pci_dev *pdev;
+	dma_addr_t dma_addr;
+	int stb_len;
+	bool is_stb_data;
+};
+
  struct amd_pmc_dev {
  	void __iomem *regbase;
  	void __iomem *smu_virt_addr;
@@ -38,10 +49,13 @@ struct amd_pmc_dev {
  	struct dentry *dbgfs_dir;
  	struct quirk_entry *quirks;
  	bool disable_8042_wakeup;
+	struct amd_mp2_dev *mp2;
  };
void amd_pmc_process_restore_quirks(struct amd_pmc_dev *dev);
  void amd_pmc_quirks_init(struct amd_pmc_dev *dev);
+void amd_mp2_stb_init(struct amd_pmc_dev *dev);
+void amd_mp2_stb_deinit(void);
/* List of supported CPU ids */
  #define AMD_CPU_ID_RV			0x15D0





[Index of Archives]     [Linux Kernel Development]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux