Re: [PATCH v3 3/4] drm/amdgpu: add support to dump gfx10 queue registers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 5/16/2024 1:42 AM, Deucher, Alexander wrote:
[Public]

-----Original Message-----
From: Sunil Khatri <sunil.khatri@xxxxxxx>
Sent: Wednesday, May 15, 2024 8:18 AM
To: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian
<Christian.Koenig@xxxxxxx>
Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Khatri, Sunil <Sunil.Khatri@xxxxxxx>
Subject: [PATCH v3 3/4] drm/amdgpu: add support to dump gfx10 queue
registers

Add gfx queue register for all instances in ip dump for gfx10.

Signed-off-by: Sunil Khatri <sunil.khatri@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 86
+++++++++++++++++++++++++
  2 files changed, 87 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index d96873c154ed..54232066cd3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -437,6 +437,7 @@ struct amdgpu_gfx {
       /* IP reg dump */
       uint32_t                        *ipdump_core;
       uint32_t                        *ipdump_cp;
+     uint32_t                        *ipdump_gfx_queue;
I'd call this ip_dump_gfx or ip_dump_gfx_queues to better align with that it stores.

  };

  struct amdgpu_gfx_ras_reg_entry {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index daf9a3571183..5b8132ecc039 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -424,6 +424,33 @@ static const struct amdgpu_hwip_reg_entry
gc_cp_reg_list_10[] = {
       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)  };

+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+     /* gfx queue registers */
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+     SOC15_REG_ENTRY_STR(GC, 0,
mmCP_GFX_HQD_DEQUEUE_REQUEST),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+     SOC15_REG_ENTRY_STR(GC, 0,
mmCP_GFX_HQD_QUE_MGR_CONTROL),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+     SOC15_REG_ENTRY_STR(GC, 0,
mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+     SOC15_REG_ENTRY_STR(GC, 0,
mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+     SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI) };
+
  static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4,
0xffffffff, 0x00400014),
       SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL,
0xfcff8fff, 0xf8000100), @@ -4664,6 +4691,19 @@ static void
gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
       } else {
               adev->gfx.ipdump_cp = ptr;
       }
+
+     /* Allocate memory for gfx cp queue registers for all the instances */
+     reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+     inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+             adev->gfx.me.num_queue_per_pipe;
+
+     ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+     if (ptr == NULL) {
+             DRM_ERROR("Failed to allocate memory for GFX CP IP
Dump\n");
+             adev->gfx.ipdump_gfx_queue = NULL;
+     } else {
+             adev->gfx.ipdump_gfx_queue = ptr;
+     }
  }

  static int gfx_v10_0_sw_init(void *handle) @@ -4874,6 +4914,7 @@ static
int gfx_v10_0_sw_fini(void *handle)

       kfree(adev->gfx.ipdump_core);
       kfree(adev->gfx.ipdump_cp);
+     kfree(adev->gfx.ipdump_gfx_queue);

       return 0;
  }
@@ -9368,6 +9409,26 @@ static void gfx_v10_ip_print(void *handle, struct
drm_printer *p)
                       }
               }
       }
+
+     /* print gfx queue registers for all instances */
+     if (!adev->gfx.ipdump_gfx_queue)
+             return;
+
+     reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+
+     for (i = 0; i < adev->gfx.me.num_me; i++) {
+             for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+                     for (k = 0; k < adev->gfx.me.num_queue_per_pipe;
k++) {
+                             drm_printf(p, "me %d, pipe %d, queue %d\n",
i, j, k);
+                             for (reg = 0; reg < reg_count; reg++) {
+                                     drm_printf(p, "%-50s \t 0x%08x\n",
+
gc_gfx_queue_reg_list_10[reg].reg_name,
+                                                adev-
gfx.ipdump_gfx_queue[index + reg]);
+                             }
+                             index += reg_count;
+                     }
+             }
+     }
  }

  static void gfx_v10_ip_dump(void *handle) @@ -9414,6 +9475,31 @@ static
void gfx_v10_ip_dump(void *handle)
       nv_grbm_select(adev, 0, 0, 0, 0);
       mutex_unlock(&adev->srbm_mutex);
       amdgpu_gfx_off_ctrl(adev, true);
+
+     /* dump gfx queue registers for all instances */
+     if (!adev->gfx.ipdump_gfx_queue)
+             return;
+
+     reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+     amdgpu_gfx_off_ctrl(adev, false);
+     mutex_lock(&adev->srbm_mutex);
+     for (i = 0; i < adev->gfx.me.num_me; i++) {
+             for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+                     for (k = 0; k < adev->gfx.me.num_queue_per_pipe;
k++) {
+                             nv_grbm_select(adev, i, j, k, 0);
+
+                             for (reg = 0; reg < reg_count; reg++) {
+                                     adev->gfx.ipdump_gfx_queue[index +
reg] =
+
       RREG32(SOC15_REG_ENTRY_OFFSET(
+
       gc_gfx_queue_reg_list_10[reg]));
+                             }
+                             index += reg_count;
+                     }
+             }
+     }
Does this one not need an msleep?

Since the registers are also less and also total no of times loop runs is lesser too. But we can add the msleep as it does not seems to add any delay in overall.

Regards
Sunil Khatri


Alex

+     nv_grbm_select(adev, 0, 0, 0, 0);
+     mutex_unlock(&adev->srbm_mutex);
+     amdgpu_gfx_off_ctrl(adev, true);
  }

  static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
--
2.34.1



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux