[Public] > -----Original Message----- > From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> > Sent: November 22, 2022 6:39 PM > To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd- > gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH 05/29] drm/amdgpu: setup hw debug registers on driver > initialization > > > On 2022-10-31 12:23, Jonathan Kim wrote: > > Add missing debug trap registers references and initialize all debug > > registers on boot by clearing the hardware exception overrides and the > > wave allocation ID index. > > > > For debug devices that only support single process debugging, enable > > trap temporary setup by default. > > > > Debug devices that support multi-process debugging require trap > > temporary setup to be disabled by default in order to satisfy microbench > > performance when in non-debug mode. > > Where is this done? I don't think it's in the MQD setup because that > happens unconditionally on all GPUs. Right I forgot to update gfx_v9_4_2_debug_trap_config_init to clear TRAP_EN instead of setting it. I'll fix that. > > > > > > The debugger requires that TTMPs 6 & 7 save the dispatch ID to map > > waves onto dispatch during compute context inspection. > > In order to correctly this up, set the special reserved CP bit by default > > whenever the MQD is initailized. > > There is a word missing here. "In order to correctly _set_ this up ..."? Whoops. Thanks. > > This patch covers GFXv9 and 10. Will GFXv11 be handled separately? Ok. I'll include GFX11 as well for the next round of reviews in this patch. Thanks, Jon > > Regards, > Felix > > > > > > Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 26 +++++++ > > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++++++++ > > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 5 ++ > > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 5 ++ > > .../include/asic_reg/gc/gc_10_1_0_offset.h | 14 ++++ > > .../include/asic_reg/gc/gc_10_1_0_sh_mask.h | 69 > +++++++++++++++++++ > > .../include/asic_reg/gc/gc_10_3_0_offset.h | 10 +++ > > .../include/asic_reg/gc/gc_10_3_0_sh_mask.h | 4 ++ > > 8 files changed, 163 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > index af94ac580d3e..d49aff0b4ba3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > @@ -4904,6 +4904,29 @@ static u32 > gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade > > > > #define DEFAULT_SH_MEM_BASES (0x6000) > > > > +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device > *adev, > > + uint32_t first_vmid, > > + uint32_t last_vmid) > > +{ > > + uint32_t data; > > + uint32_t trap_config_vmid_mask = 0; > > + int i; > > + > > + /* Calculate trap config vmid mask */ > > + for (i = first_vmid; i < last_vmid; i++) > > + trap_config_vmid_mask |= (1 << i); > > + > > + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, > > + VMID_SEL, trap_config_vmid_mask); > > + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, > > + TRAP_EN, 1); > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), > data); > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), > 0); > > + > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), > 0); > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), > 0); > > +} > > + > > static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) > > { > > int i; > > @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct > amdgpu_device *adev) > > WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); > > WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); > > } > > + > > + gfx_v10_0_debug_trap_config_init(adev, adev- > >vm_manager.first_kfd_vmid, > > + AMDGPU_NUM_VMID); > > } > > > > static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > index 0320be4a5fc6..a0e5ad342f13 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > > @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct > amdgpu_device *adev) > > adev->gfx.config.num_rbs = hweight32(active_rbs); > > } > > > > +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device > *adev, > > + uint32_t first_vmid, > > + uint32_t last_vmid) > > +{ > > + uint32_t data; > > + uint32_t trap_config_vmid_mask = 0; > > + int i; > > + > > + /* Calculate trap config vmid mask */ > > + for (i = first_vmid; i < last_vmid; i++) > > + trap_config_vmid_mask |= (1 << i); > > + > > + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, > > + VMID_SEL, trap_config_vmid_mask); > > + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, > > + TRAP_EN, 1); > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), > data); > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), > 0); > > + > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), > 0); > > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), > 0); > > +} > > + > > #define DEFAULT_SH_MEM_BASES (0x6000) > > static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) > > { > > @@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle) > > if (r) > > return r; > > > > + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) > > + gfx_v9_4_2_debug_trap_config_init(adev, > > + adev->vm_manager.first_kfd_vmid, > AMDGPU_NUM_VMID); > > + else > > + gfx_v9_0_debug_trap_config_init(adev, > > + adev->vm_manager.first_kfd_vmid, > AMDGPU_NUM_VMID); > > + > > return 0; > > } > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c > > index d3e2b6a599a4..cb484ace17de 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c > > @@ -117,6 +117,11 @@ static void init_mqd(struct mqd_manager *mm, > void **mqd, > > 1 << > CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | > > 1 << > CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; > > > > + /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the > > + * DISPATCH_PTR. This is required for the kfd debugger > > + */ > > + m->cp_hqd_hq_scheduler0 = 1 << 14; > > + > > if (q->format == KFD_QUEUE_FORMAT_AQL) { > > m->cp_hqd_aql_control = > > 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > > index 0778e587a2d6..86f1cf090246 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > > @@ -164,6 +164,11 @@ static void init_mqd(struct mqd_manager *mm, > void **mqd, > > 1 << > CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | > > 1 << > CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; > > > > + /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the > > + * DISPATCH_PTR. This is required for the kfd debugger > > + */ > > + m->cp_hqd_hq_status0 = 1 << 14; > > + > > if (q->format == KFD_QUEUE_FORMAT_AQL) { > > m->cp_hqd_aql_control = > > 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; > > diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h > b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h > > index 18d34bbceebe..7d384f86bd67 100644 > > --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h > > +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h > > @@ -5190,6 +5190,20 @@ > > #define mmSPI_WCL_PIPE_PERCENT_CS6_BASE_IDX > 0 > > #define mmSPI_WCL_PIPE_PERCENT_CS7 > 0x1f70 > > #define mmSPI_WCL_PIPE_PERCENT_CS7_BASE_IDX > 0 > > +#define mmSPI_GDBG_WAVE_CNTL > 0x1f71 > > +#define mmSPI_GDBG_WAVE_CNTL_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_CONFIG > 0x1f72 > > +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_MASK > 0x1f73 > > +#define mmSPI_GDBG_TRAP_MASK_BASE_IDX > 0 > > +#define mmSPI_GDBG_WAVE_CNTL2 > 0x1f74 > > +#define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX > 0 > > +#define mmSPI_GDBG_WAVE_CNTL3 > 0x1f75 > > +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_DATA0 > 0x1f78 > > +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_DATA1 > 0x1f79 > > +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX > 0 > > #define mmSPI_COMPUTE_QUEUE_RESET > 0x1f7b > > #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX > 0 > > #define mmSPI_RESOURCE_RESERVE_CU_0 > 0x1f7c > > diff --git > a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h > b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h > > index 4127896ffcdf..08772ba845b0 100644 > > --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h > > +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h > > @@ -19646,6 +19646,75 @@ > > //SPI_WCL_PIPE_PERCENT_CS7 > > #define SPI_WCL_PIPE_PERCENT_CS7__VALUE__SHIFT > 0x0 > > #define SPI_WCL_PIPE_PERCENT_CS7__VALUE_MASK > 0x7FL > > +//SPI_GDBG_WAVE_CNTL > > +#define SPI_GDBG_WAVE_CNTL__STALL_RA__SHIFT > 0x0 > > +#define SPI_GDBG_WAVE_CNTL__STALL_VMID__SHIFT > 0x1 > > +#define SPI_GDBG_WAVE_CNTL__STALL_RA_MASK > 0x00000001L > > +#define SPI_GDBG_WAVE_CNTL__STALL_VMID_MASK > 0x0001FFFEL > > +//SPI_GDBG_TRAP_CONFIG > > +#define SPI_GDBG_TRAP_CONFIG__ME_SEL__SHIFT > 0x0 > > +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL__SHIFT > 0x2 > > +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL__SHIFT > 0x4 > > +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH__SHIFT > 0x7 > > +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH__SHIFT > 0x8 > > +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH__SHIFT > 0x9 > > +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN__SHIFT > 0xf > > +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL__SHIFT > 0x10 > > +#define SPI_GDBG_TRAP_CONFIG__ME_SEL_MASK > 0x00000003L > > +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL_MASK > 0x0000000CL > > +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL_MASK > 0x00000070L > > +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH_MASK > 0x00000080L > > +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH_MASK > 0x00000100L > > +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH_MASK > 0x00000200L > > +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN_MASK > 0x00008000L > > +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL_MASK > 0xFFFF0000L > > +//SPI_GDBG_TRAP_MASK > > +#define SPI_GDBG_TRAP_MASK__EXCP_EN__SHIFT > 0x0 > > +#define SPI_GDBG_TRAP_MASK__REPLACE__SHIFT > 0x9 > > +#define SPI_GDBG_TRAP_MASK__EXCP_EN_MASK > 0x01FFL > > +#define SPI_GDBG_TRAP_MASK__REPLACE_MASK > 0x0200L > > +//SPI_GDBG_WAVE_CNTL2 > > +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK__SHIFT > 0x0 > > +#define SPI_GDBG_WAVE_CNTL2__MODE__SHIFT > 0x10 > > +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK_MASK > 0x0000FFFFL > > +#define SPI_GDBG_WAVE_CNTL2__MODE_MASK > 0x00030000L > > +//SPI_GDBG_WAVE_CNTL3 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_PS__SHIFT > 0x0 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_VS__SHIFT > 0x1 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_GS__SHIFT > 0x2 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_HS__SHIFT > 0x3 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG__SHIFT > 0x4 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0__SHIFT > 0x5 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1__SHIFT > 0x6 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2__SHIFT > 0x7 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3__SHIFT > 0x8 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4__SHIFT > 0x9 > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5__SHIFT > 0xa > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6__SHIFT > 0xb > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7__SHIFT > 0xc > > +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION__SHIFT > 0xd > > +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT__SHIFT > 0x1c > > +#define SPI_GDBG_WAVE_CNTL3__STALL_PS_MASK > 0x00000001L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_VS_MASK > 0x00000002L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_GS_MASK > 0x00000004L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_HS_MASK > 0x00000008L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG_MASK > 0x00000010L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0_MASK > 0x00000020L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1_MASK > 0x00000040L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2_MASK > 0x00000080L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3_MASK > 0x00000100L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4_MASK > 0x00000200L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5_MASK > 0x00000400L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6_MASK > 0x00000800L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7_MASK > 0x00001000L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION_MASK > 0x0FFFE000L > > +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT_MASK > 0x10000000L > > +//SPI_GDBG_TRAP_DATA0 > > +#define SPI_GDBG_TRAP_DATA0__DATA__SHIFT > 0x0 > > +#define SPI_GDBG_TRAP_DATA0__DATA_MASK > 0xFFFFFFFFL > > +//SPI_GDBG_TRAP_DATA1 > > +#define SPI_GDBG_TRAP_DATA1__DATA__SHIFT > 0x0 > > +#define SPI_GDBG_TRAP_DATA1__DATA_MASK > 0xFFFFFFFFL > > //SPI_COMPUTE_QUEUE_RESET > > #define SPI_COMPUTE_QUEUE_RESET__RESET__SHIFT > 0x0 > > #define SPI_COMPUTE_QUEUE_RESET__RESET_MASK > 0x01L > > diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h > b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h > > index 3973110f149c..d09f1a06f4bf 100644 > > --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h > > +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h > > @@ -26,6 +26,8 @@ > > #define mmSQ_DEBUG_STS_GLOBAL_BASE_IDX > 0 > > #define mmSQ_DEBUG_STS_GLOBAL2 > 0x10B0 > > #define mmSQ_DEBUG_STS_GLOBAL2_BASE_IDX > 0 > > +#define mmSQ_DEBUG 0x10B1 > > +#define mmSQ_DEBUG_BASE_IDX 0 > > > > // addressBlock: gc_sdma0_sdma0dec > > // base address: 0x4980 > > @@ -4849,10 +4851,18 @@ > > #define mmSPI_WCL_PIPE_PERCENT_CS3_BASE_IDX > 0 > > #define mmSPI_GDBG_WAVE_CNTL > 0x1f71 > > #define mmSPI_GDBG_WAVE_CNTL_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_CONFIG > 0x1f72 > > +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX > 0 > > #define mmSPI_GDBG_TRAP_MASK > 0x1f73 > > #define mmSPI_GDBG_TRAP_MASK_BASE_IDX > 0 > > #define mmSPI_GDBG_WAVE_CNTL2 > 0x1f74 > > #define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX > 0 > > +#define mmSPI_GDBG_WAVE_CNTL3 > 0x1f75 > > +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_DATA0 > 0x1f78 > > +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX > 0 > > +#define mmSPI_GDBG_TRAP_DATA1 > 0x1f79 > > +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX > 0 > > #define mmSPI_COMPUTE_QUEUE_RESET > 0x1f7b > > #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX > 0 > > #define mmSPI_RESOURCE_RESERVE_CU_0 > 0x1f7c > > diff --git > a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h > b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h > > index d4e8ff22ecb8..fc85aee010fe 100644 > > --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h > > +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h > > @@ -47853,6 +47853,10 @@ > > > > > > // addressBlock: sqind > > +//SQ_DEBUG > > +#define SQ_DEBUG__SINGLE_MEMOP_MASK 0x00000001L > > +#define SQ_DEBUG__SINGLE_MEMOP__SHIFT 0x00000000 > > + > > //SQ_DEBUG_STS_GLOBAL > > #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0_MASK > 0x000000ffL > > #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0__SHIFT > 0x00000000
<<attachment: winmail.dat>>