Re: [PATCH v1 05/15] drm/amdgpu: add vcn_v4_0_3 ip dump support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




if adev->vcn.cur_state == AMD_PG_STATE_UNGATE then vcn is powered on and holding vcn.vcn_pg_lock to access it is safe.

And cancelling vcn.idle_work must be the first thing to do, else you can run into a vcn power off in the middle of register dump which you want to avoid i think.

This is safer check than accessing registers to find out if powered on which is asynchronous w.r.t job handling path. I am not aware of all the possible states during which ip_dump can be triggered, so ignore if above isn't a possible scenario.


Regards,

Sathish

On 8/8/2024 12:59 PM, Khatri, Sunil wrote:

On 8/8/2024 12:44 PM, Lazar, Lijo wrote:

On 8/8/2024 12:36 PM, Khatri, Sunil wrote:
On 8/8/2024 11:20 AM, Lazar, Lijo wrote:
On 8/7/2024 2:58 AM, Alex Deucher wrote:
On Tue, Aug 6, 2024 at 4:18 AM Sunil Khatri <sunil.khatri@xxxxxxx>
wrote:
Add support of vcn ip dump in the devcoredump
for vcn_v4_0_3.

Signed-off-by: Sunil Khatri <sunil.khatri@xxxxxxx>
---
   drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 170
+++++++++++++++++++++++-
   1 file changed, 169 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 9bae95538b62..dd3baccb2904 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -45,6 +45,132 @@
   #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
   #define VCN1_VID_SOC_ADDRESS_3_0       0x48300

+static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET1),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_NC1_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC1_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), +       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_CACHE_VMIDS_MULTI),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_VCPU_NC_VMIDS_MULTI),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SOFT_RESET2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_GATE),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_STATUS),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CGC_CTRL3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_CTRL),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_STATUS2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SUVD_CGC_GATE2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_VCPU_CACHE_OFFSET2),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_GPGPU_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_GPGPU_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_CURR_LUMA_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_CURR_CHROMA_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_LOW), +       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_DBW_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_CM_COLOC_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP0_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSP0_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP1_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSP1_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP2_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSP2_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSP3_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSP3_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD0_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSD0_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD1_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSD1_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD2_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSD2_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD3_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSD3_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_BSD4_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_BSD4_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE3_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE3_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE4_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE4_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE5_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE5_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE6_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE6_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE7_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_VCPU_CACHE7_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_MIF_SCLR_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_SCLR_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_SCLR2_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_SCLR2_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_IMAGEPASTE_LUMA_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_IMAGEPASTE_CHROMA_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_PRIVACY_LUMA_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_LMI_MIF_PRIVACY_CHROMA_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_LMI_STATUS),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+       SOC15_REG_ENTRY_STR(VCN, 0,
regUVD_DPG_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_VCPU_CACHE_OFFSET0),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMI_VCPU_CACHE_VMID),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_CLK_EN_VCPU_REPORT),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL2),
+       SOC15_REG_ENTRY_STR(VCN, 0, regUVD_SCRATCH1)
+};
+
   #define NORMALIZE_VCN_REG_OFFSET(offset) \
                  (offset & 0x1FFFF)

@@ -92,6 +218,8 @@ static int vcn_v4_0_3_sw_init(void *handle)
          struct amdgpu_device *adev = (struct amdgpu_device *)handle;
          struct amdgpu_ring *ring;
          int i, r, vcn_inst;
+       uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3);
+       uint32_t *ptr;

          r = amdgpu_vcn_sw_init(adev);
          if (r)
@@ -159,6 +287,15 @@ static int vcn_v4_0_3_sw_init(void *handle)
                  }
          }

+       /* Allocate memory for VCN IP Dump buffer */
+       ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count,
sizeof(uint32_t), GFP_KERNEL);
+       if (ptr == NULL) {
+               DRM_ERROR("Failed to allocate memory for VCN IP
Dump\n");
+               adev->vcn.ip_dump = NULL;
+       } else {
+               adev->vcn.ip_dump = ptr;
+       }
+
          return 0;
   }

@@ -194,6 +331,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)

          r = amdgpu_vcn_sw_fini(adev);

+       kfree(adev->vcn.ip_dump);
+
          return r;
   }

@@ -1684,6 +1823,35 @@ static void vcn_v4_0_3_set_irq_funcs(struct
amdgpu_device *adev)
          adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
   }

+static void vcn_v4_0_3_dump_ip_state(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int i, j;
+       bool is_powered;
+       uint32_t inst_off;
+       uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3);
+
+       if (!adev->vcn.ip_dump)
+               return;
+
+       for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+               if (adev->vcn.harvest_config & (1 << i))
+                       continue;
+
+               inst_off = i * reg_count;
+               /* mmUVD_POWER_STATUS is always readable and is
first element of the array */
+               adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i,
regUVD_POWER_STATUS);
I think you need to use the GET_INST() macro to properly handle
this.  E.g.,
vcn_inst = GET_INST(VCN, i);

Alex

+               is_powered = (adev->vcn.ip_dump[inst_off] &
+
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1;
+
+               if (is_powered)
+                       for (j = 1; j < reg_count; j++)
+ adev->vcn.ip_dump[inst_off + j] =
+
RREG32(SOC15_REG_ENTRY_OFFSET_INST(
+
vcn_reg_list_4_0_3[j], i));
VCN 4.0.3 supports DPG. As far as I understand, most of these registers
are accessed indirectly in DPG mode through indirect SRAM.

Checking UVD power status alone may not be sufficient for direct access.
i am following what windows is following and most of the registers are
directly access but i agree some might not be.
Whether Windows logic works is the first question other than the
secondary question of value in logging some of those registers.
True. Cant say but the bare minimum we could do for a starting point was the intent. Dumping VCN registers is a challenge due to its dynamic power gating controlled by firmware. based on VCN fw guys probability is in case of VCN hung we might be in power up state to read some of the status registers if not all.

   We are assuming in case
of a VCN hang it should be in good power state and we should be able to
read most of the registers.
'is_powered ' - It's quite obvious that there is no assumption like that
:). Secondly, when there are multiple instances where only one VCN
instance got hung, and others may not be - this assumption won't hold good.

The principal is we are dumping all the IP's irrespective of who caused the hang so no matter what instance causes hang the registers are dumped for all. The vcn hang information is captured in the kernel logs

that can be used along with it. Also is_powered is per instance and if its powered off we arent going to read the register at all.

Based on the experiments i did in case of hang caused by gfx i found vcn to be powered off as there isnt any work load on VCN to keep it up. We will improvise on the functionality as we start seeing issues.

Thanks,
Lijo

Based on further feedback will do the
needful but right now the point where we are dumping the registers we
could not make any change in power state.

Regards
Sunil khatri

Thanks,
Lijo

+       }
+}
+
   static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
          .name = "vcn_v4_0_3",
          .early_init = vcn_v4_0_3_early_init,
@@ -1702,7 +1870,7 @@ static const struct amd_ip_funcs
vcn_v4_0_3_ip_funcs = {
          .post_soft_reset = NULL,
          .set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
          .set_powergating_state = vcn_v4_0_3_set_powergating_state,
-       .dump_ip_state = NULL,
+       .dump_ip_state = vcn_v4_0_3_dump_ip_state,
          .print_ip_state = NULL,
   };

--
2.34.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux