Re: [PATCH 3/4] drm/amdgpu: add reset register trace dump function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 21.01.22 um 21:34 schrieb Sharma, Shashank:
From 1c5c552eeddaffd9fb3e7d45ece1b2b28fccc575 Mon Sep 17 00:00:00 2001
From: Somalapuram Amaranath <Amaranath.Somalapuram@xxxxxxx>
Date: Fri, 21 Jan 2022 14:19:10 +0530
Subject: [PATCH 3/4] drm/amdgpu: add reset register trace dump function for
 gfx_v10_0

Implementation of register trace dump function on the AMD GPU resets

Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  8 ++++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c    | 53 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/include/amd_shared.h  |  1 +
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d855cb53c7e0..c97b53b54333 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -537,6 +537,14 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
               __entry->seqno)
 );

+TRACE_EVENT(gfx_v10_0_reset_reg_dumps,
+        TP_PROTO(char *reg_dumps),
+        TP_ARGS(reg_dumps),
+        TP_STRUCT__entry(__string(dumps, reg_dumps)),
+        TP_fast_assign(__assign_str(dumps, reg_dumps);),
+        TP_printk("amdgpu register dump {%s}", __get_str(dumps))
+);
+
 #undef AMDGPU_JOB_GET_TIMELINE_NAME
 #endif

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 16dbe593cba2..05974ed5416d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -46,7 +46,7 @@
 #include "v10_structs.h"
 #include "gfx_v10_0.h"
 #include "nbio_v2_3.h"
-
+#include "amdgpu_trace.h"
 /*
  * Navi10 has two graphic rings to share each graphic pipe.
  * 1. Primary ring
@@ -188,6 +188,12 @@
 #define RLCG_ERROR_REPORT_ENABLED(adev) \
     (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))

+#define N_REGS (17)
+#define DUMP_REG(addr) do {                    \
+        (dump)[i][0] = (addr);                \
+        (dump)[i++][1] =  RREG32_SOC15_IP(GC, addr);    \
+    } while (0)

Those macros need an AMDGPU_ prefix and maybe some better names.

From the design POV I'm really wondering if it wouldn't be better if userspace defines the registers we want to dump in case of a crash.

Regards,
Christian.

+
 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
@@ -7488,7 +7494,6 @@ static int gfx_v10_0_hw_init(void *handle)
 {
     int r;
     struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
     if (!amdgpu_emu_mode)
         gfx_v10_0_init_golden_registers(adev);

@@ -7602,6 +7607,49 @@ static int gfx_v10_0_hw_fini(void *handle)
     return 0;
 }

+static int gfx_v10_0_reset_reg_dumps(void *handle)
+{
+    struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+    uint32_t i = 0;
+    uint32_t (*dump)[2], n_regs = 0;
+    char *reg_dumps;
+
+    dump = kmalloc(N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+    reg_dumps = kmalloc(1024, GFP_KERNEL);
+
+    if (dump == NULL || reg_dumps == NULL)
+        return -ENOMEM;
+
+    DUMP_REG(mmGRBM_STATUS2);
+    DUMP_REG(mmGRBM_STATUS_SE0);
+    DUMP_REG(mmGRBM_STATUS_SE1);
+    DUMP_REG(mmGRBM_STATUS_SE2);
+    DUMP_REG(mmGRBM_STATUS_SE3);
+    DUMP_REG(mmSDMA0_STATUS_REG);
+    DUMP_REG(mmSDMA1_STATUS_REG);
+    DUMP_REG(mmCP_STAT);
+    DUMP_REG(mmCP_STALLED_STAT1);
+    DUMP_REG(mmCP_STALLED_STAT1);
+    DUMP_REG(mmCP_STALLED_STAT3);
+    DUMP_REG(mmCP_CPC_STATUS);
+    DUMP_REG(mmCP_CPC_BUSY_STAT);
+    DUMP_REG(mmCP_CPC_STALLED_STAT1);
+    DUMP_REG(mmCP_CPF_STATUS);
+    DUMP_REG(mmCP_CPF_BUSY_STAT);
+    DUMP_REG(mmCP_CPF_STALLED_STAT1);
+
+    n_regs = i;
+
+    for (i = 0; i < n_regs; i++)
+        sprintf(reg_dumps + strlen(reg_dumps), "%08x: %08x, ", dump[i][0], dump[i][1]);
+
+    trace_gfx_v10_0_reset_reg_dumps(reg_dumps);
+    kfree(dump);
+    kfree(reg_dumps);
+
+    return 0;
+}
+
 static int gfx_v10_0_suspend(void *handle)
 {
     return gfx_v10_0_hw_fini(handle);
@@ -9367,6 +9415,7 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
     .set_clockgating_state = gfx_v10_0_set_clockgating_state,
     .set_powergating_state = gfx_v10_0_set_powergating_state,
     .get_clockgating_state = gfx_v10_0_get_clockgating_state,
+    .reset_reg_dumps = gfx_v10_0_reset_reg_dumps,
 };

 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 257f280d3d53..76d3a55278df 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -296,6 +296,7 @@ struct amd_ip_funcs {
                      enum amd_powergating_state state);
     void (*get_clockgating_state)(void *handle, u32 *flags);
     int (*enable_umd_pstate)(void *handle, enum amd_dpm_forced_level *level);
+    int (*reset_reg_dumps)(void *handle);
 };






[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux