Hi, On 10/26/2021 4:51 PM, Scott Bruce wrote: > v3 of this patch hard crashes on my Cezanne laptop on the second > suspend attempt. The problem appears to be with the 3rd patch in the > series, the first two don't cause any problems. > > This tree suspends fine using the original v1 and the first two > patches from the new series: > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.com%2Fsmbruce%2Flinux-stable-s0ix%2F-%2Fcommits%2Fv5.14.14-s0ix-testing4&data=04%7C01%7CShyam-sundar.S-k%40amd.com%7C78678c08813a422ab71c08d99872d01b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637708443387757519%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=3cOaDPaQrdwDlVCfje2pyt9ZTp5wqmlJTdpvY2JHmZo%3D&reserved=0 > This crashes: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.com%2Fsmbruce%2Flinux-stable-s0ix%2F-%2Fcommits%2Fv5.14.14-s0ix-testing3-DONTUSE&data=04%7C01%7CShyam-sundar.S-k%40amd.com%7C78678c08813a422ab71c08d99872d01b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637708443387757519%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=vPPyw7Ir6yGqwT4PkaSwpVzzLiENQm4VxOHt68VWTV0%3D&reserved=0 > > crash journal: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.com%2F-%2Fsnippets%2F2194519&data=04%7C01%7CShyam-sundar.S-k%40amd.com%7C78678c08813a422ab71c08d99872d01b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637708443387757519%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=hf425kgujjAWxcma4%2BMy5FUl%2FAbhGwZ7ieMdkfU0T0A%3D&reserved=0 Thanks for trying STB. I would like to see the FW versions running on your machine (since I am unable to replicate the same with v3 of STB). Can you tell me the output of: cat /sys/kernel/debug/dri/0/amdgpu_firmware_info Also, did you pass amd_pmc.enable_stb=1 to your boot params, STB is a on-demand feature and that gets activated only when the driver is probed with enable_stb param. Could not find this info in your journal. Thanks, Shyam > > On Fri, Oct 22, 2021 at 10:31 AM Sanket Goswami <Sanket.Goswami@xxxxxxx> wrote: >> >> STB (Smart Trace Buffer), is a debug trace buffer which is used to help >> isolate failures by analyzing the last feature that a system was running >> before hitting a failure. This nonintrusive way is always running in the >> background and trace is stored into the SoC. >> >> This patch provides mechanism to access the STB buffer using the read >> and write routines. >> >> Co-developed-by: Shyam Sundar S K <Shyam-sundar.S-k@xxxxxxx> >> Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@xxxxxxx> >> Signed-off-by: Sanket Goswami <Sanket.Goswami@xxxxxxx> >> --- >> Changes in v3: >> - Address review comments from Mark Gross. >> >> Changes in v2: >> - Create amd_pmc_stb_debugfs_fops structure to get STB data. >> - Address review comments from Hans. >> >> drivers/platform/x86/amd-pmc.c | 120 +++++++++++++++++++++++++++++++++ >> 1 file changed, 120 insertions(+) >> >> diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c >> index 50cb65e38d11..665d57ff222d 100644 >> --- a/drivers/platform/x86/amd-pmc.c >> +++ b/drivers/platform/x86/amd-pmc.c >> @@ -35,6 +35,12 @@ >> #define AMD_PMC_SCRATCH_REG_CZN 0x94 >> #define AMD_PMC_SCRATCH_REG_YC 0xD14 >> >> +/* STB Registers */ >> +#define AMD_PMC_STB_INDEX_ADDRESS 0xF8 >> +#define AMD_PMC_STB_INDEX_DATA 0xFC >> +#define AMD_PMC_STB_PMI_0 0x03E30600 >> +#define AMD_PMC_STB_PREDEF 0xC6000001 >> + >> /* Base address of SMU for mapping physical address to virtual address */ >> #define AMD_PMC_SMU_INDEX_ADDRESS 0xB8 >> #define AMD_PMC_SMU_INDEX_DATA 0xBC >> @@ -82,6 +88,7 @@ >> #define SOC_SUBSYSTEM_IP_MAX 12 >> #define DELAY_MIN_US 2000 >> #define DELAY_MAX_US 3000 >> +#define FIFO_SIZE 4096 >> enum amd_pmc_def { >> MSG_TEST = 0x01, >> MSG_OS_HINT_PCO, >> @@ -128,8 +135,14 @@ struct amd_pmc_dev { >> #endif /* CONFIG_DEBUG_FS */ >> }; >> >> +static bool enable_stb; >> +module_param(enable_stb, bool, 0644); >> +MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism"); >> + >> static struct amd_pmc_dev pmc; >> static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, u32 arg, u32 *data, u8 msg, bool ret); >> +static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data); >> +static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf); >> >> static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) >> { >> @@ -176,6 +189,51 @@ static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev) >> return 0; >> } >> >> +static int amd_pmc_stb_debugfs_open(struct inode *inode, struct file *filp) >> +{ >> + struct amd_pmc_dev *dev = filp->f_inode->i_private; >> + u32 *buf; >> + int rc; >> + >> + buf = devm_kmalloc(dev->dev, FIFO_SIZE * sizeof(u32), GFP_KERNEL); >> + if (!buf) >> + return -ENOMEM; >> + >> + rc = amd_pmc_read_stb(dev, buf); >> + if (rc) >> + goto out; >> + >> + filp->private_data = buf; >> + >> +out: >> + return rc; >> +} >> + >> +static ssize_t amd_pmc_stb_debugfs_read(struct file *filp, char __user *buf, size_t size, >> + loff_t *pos) >> +{ >> + if (!filp->private_data) >> + return -EINVAL; >> + >> + return simple_read_from_buffer(buf, size, pos, filp->private_data, >> + FIFO_SIZE * sizeof(u32)); >> +} >> + >> +static int amd_pmc_stb_debugfs_release(struct inode *inode, struct file *filp) >> +{ >> + kfree(filp->private_data); >> + filp->private_data = NULL; >> + >> + return 0; >> +} >> + >> +const struct file_operations amd_pmc_stb_debugfs_fops = { >> + .owner = THIS_MODULE, >> + .open = amd_pmc_stb_debugfs_open, >> + .read = amd_pmc_stb_debugfs_read, >> + .release = amd_pmc_stb_debugfs_release, >> +}; >> + >> static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev, >> struct seq_file *s) >> { >> @@ -289,6 +347,10 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) >> &s0ix_stats_fops); >> debugfs_create_file("amd_pmc_idlemask", 0644, dev->dbgfs_dir, dev, >> &amd_pmc_idlemask_fops); >> + /* Enable STB only when the module_param is set */ >> + if (enable_stb) >> + debugfs_create_file("stb_read", 0644, dev->dbgfs_dir, dev, >> + &amd_pmc_stb_debugfs_fops); >> } >> #else >> static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) >> @@ -488,6 +550,9 @@ static int __maybe_unused amd_pmc_suspend(struct device *dev) >> if (rc) >> dev_err(pdev->dev, "suspend failed\n"); >> >> + if (enable_stb) >> + amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF); >> + >> return rc; >> } >> >> @@ -508,6 +573,10 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) >> /* Dump the IdleMask to see the blockers */ >> amd_pmc_idlemask_read(pdev, dev, NULL); >> >> + /* Write data incremented by 1 to distinguish in stb_read */ >> + if (enable_stb) >> + amd_pmc_write_stb(pdev, AMD_PMC_STB_PREDEF + 1); >> + >> return 0; >> } >> >> @@ -524,6 +593,57 @@ static const struct pci_device_id pmc_pci_ids[] = { >> { } >> }; >> >> +static int amd_pmc_write_stb(struct amd_pmc_dev *dev, u32 data) >> +{ >> + int rc; >> + >> + rc = pci_write_config_dword(dev->rdev, AMD_PMC_STB_INDEX_ADDRESS, AMD_PMC_STB_PMI_0); >> + if (rc) { >> + dev_err(dev->dev, "failed to write addr in stb: 0x%X\n", >> + AMD_PMC_STB_INDEX_ADDRESS); >> + pci_dev_put(dev->rdev); >> + return pcibios_err_to_errno(rc); >> + } >> + >> + rc = pci_write_config_dword(dev->rdev, AMD_PMC_STB_INDEX_DATA, data); >> + if (rc) { >> + dev_err(dev->dev, "failed to write data in stb: 0x%X\n", >> + AMD_PMC_STB_INDEX_DATA); >> + pci_dev_put(dev->rdev); >> + return pcibios_err_to_errno(rc); >> + } >> + >> + return 0; >> +} >> + >> +static int amd_pmc_read_stb(struct amd_pmc_dev *dev, u32 *buf) >> +{ >> + int i, err; >> + u32 value; >> + >> + err = pci_write_config_dword(dev->rdev, AMD_PMC_STB_INDEX_ADDRESS, AMD_PMC_STB_PMI_0); >> + if (err) { >> + dev_err(dev->dev, "error writing addr to stb: 0x%X\n", >> + AMD_PMC_STB_INDEX_ADDRESS); >> + pci_dev_put(dev->rdev); >> + return pcibios_err_to_errno(err); >> + } >> + >> + for (i = 0; i < FIFO_SIZE; i++) { >> + err = pci_read_config_dword(dev->rdev, AMD_PMC_STB_INDEX_DATA, &value); >> + if (err) { >> + dev_err(dev->dev, "error reading data from stb: 0x%X\n", >> + AMD_PMC_STB_INDEX_DATA); >> + pci_dev_put(dev->rdev); >> + return pcibios_err_to_errno(err); >> + } >> + >> + *buf++ = value; >> + } >> + >> + return 0; >> +} >> + >> static int amd_pmc_probe(struct platform_device *pdev) >> { >> struct amd_pmc_dev *dev = &pmc; >> -- >> 2.25.1 >>