> -----Original Message----- > From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf > Of Rex Zhu > Sent: Monday, July 03, 2017 6:13 AM > To: amd-gfx at lists.freedesktop.org > Cc: Zhu, Rex > Subject: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang > on VI > > caused by not program dynamic_cu_mask_addr in the KIQ MQD. > > v2: create struct vi_mqd_allocation in FB which will contain > 1. PM4 MQD structure. > 2. Write Pointer Poll Memory. > 3. Read Pointer Report Memory > 4. Dynamic CU Mask. > 5. Dynamic RB Mask. > > Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958 > Signed-off-by: Rex Zhu <Rex.Zhu at amd.com> Reviewed-by: Alex Deucher <alexander.deucher at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 27 ++-- > drivers/gpu/drm/amd/include/vi_structs.h | 268 > +++++++++++++++++++++++++++++++ > 2 files changed, 285 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 1a75ab1..452cc5b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -40,7 +40,6 @@ > > #include "bif/bif_5_0_d.h" > #include "bif/bif_5_0_sh_mask.h" > - > #include "gca/gfx_8_0_d.h" > #include "gca/gfx_8_0_enum.h" > #include "gca/gfx_8_0_sh_mask.h" > @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle) > return r; > > /* create MQD for all compute queues as well as KIQ for SRIOV case > */ > - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct > vi_mqd)); > + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct > vi_mqd_allocation)); > if (r) > return r; > > @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring > *ring) > uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; > uint32_t tmp; > > - /* init the mqd struct */ > - memset(mqd, 0, sizeof(struct vi_mqd)); > - > mqd->header = 0xC0310800; > mqd->compute_pipelinestat_enable = 0x00000001; > mqd->compute_static_thread_mgmt_se0 = 0xffffffff; > @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring > *ring) > mqd->compute_static_thread_mgmt_se2 = 0xffffffff; > mqd->compute_static_thread_mgmt_se3 = 0xffffffff; > mqd->compute_misc_reserved = 0x00000003; > - > + if (!(adev->flags & AMD_IS_APU)) { > + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring- > >mqd_gpu_addr > + + offsetof(struct > vi_mqd_allocation, dyamic_cu_mask)); > + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring- > >mqd_gpu_addr > + + offsetof(struct > vi_mqd_allocation, dyamic_cu_mask)); > + } > eop_base_addr = ring->eop_gpu_addr >> 8; > mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; > mqd->cp_hqd_eop_base_addr_hi = > upper_32_bits(eop_base_addr); > @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > if (adev->gfx.in_reset) { /* for GPU_RESET case */ > /* reset MQD to a clean status */ > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); > + memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); > > /* reset ring buffer */ > ring->wptr = 0; > @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > vi_srbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > } else { > + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); > + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = > 0xFFFFFFFF; > + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = > 0xFFFFFFFF; > mutex_lock(&adev->srbm_mutex); > vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > gfx_v8_0_mqd_init(ring); > @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > mutex_unlock(&adev->srbm_mutex); > > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(*mqd)); > + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(struct vi_mqd_allocation)); > } > > return r; > @@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct > amdgpu_ring *ring) > int mqd_idx = ring - &adev->gfx.compute_ring[0]; > > if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { > + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); > + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = > 0xFFFFFFFF; > + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = > 0xFFFFFFFF; > mutex_lock(&adev->srbm_mutex); > vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > gfx_v8_0_mqd_init(ring); > @@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct > amdgpu_ring *ring) > mutex_unlock(&adev->srbm_mutex); > > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(*mqd)); > + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(struct vi_mqd_allocation)); > } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ > /* reset MQD to a clean status */ > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); > + memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); > /* reset ring buffer */ > ring->wptr = 0; > amdgpu_ring_clear_ring(ring); > diff --git a/drivers/gpu/drm/amd/include/vi_structs.h > b/drivers/gpu/drm/amd/include/vi_structs.h > index b68f8ef..ca93b51 100644 > --- a/drivers/gpu/drm/amd/include/vi_structs.h > +++ b/drivers/gpu/drm/amd/include/vi_structs.h > @@ -195,6 +195,274 @@ struct vi_mqd { > uint32_t compute_wave_restore_addr_lo; > uint32_t compute_wave_restore_addr_hi; > uint32_t compute_wave_restore_control; > + uint32_t reserved9; > + uint32_t reserved10; > + uint32_t reserved11; > + uint32_t reserved12; > + uint32_t reserved13; > + uint32_t reserved14; > + uint32_t reserved15; > + uint32_t reserved16; > + uint32_t reserved17; > + uint32_t reserved18; > + uint32_t reserved19; > + uint32_t reserved20; > + uint32_t reserved21; > + uint32_t reserved22; > + uint32_t reserved23; > + uint32_t reserved24; > + uint32_t reserved25; > + uint32_t reserved26; > + uint32_t reserved27; > + uint32_t reserved28; > + uint32_t reserved29; > + uint32_t reserved30; > + uint32_t reserved31; > + uint32_t reserved32; > + uint32_t reserved33; > + uint32_t reserved34; > + uint32_t compute_user_data_0; > + uint32_t compute_user_data_1; > + uint32_t compute_user_data_2; > + uint32_t compute_user_data_3; > + uint32_t compute_user_data_4; > + uint32_t compute_user_data_5; > + uint32_t compute_user_data_6; > + uint32_t compute_user_data_7; > + uint32_t compute_user_data_8; > + uint32_t compute_user_data_9; > + uint32_t compute_user_data_10; > + uint32_t compute_user_data_11; > + uint32_t compute_user_data_12; > + uint32_t compute_user_data_13; > + uint32_t compute_user_data_14; > + uint32_t compute_user_data_15; > + uint32_t cp_compute_csinvoc_count_lo; > + uint32_t cp_compute_csinvoc_count_hi; > + uint32_t reserved35; > + uint32_t reserved36; > + uint32_t reserved37; > + uint32_t cp_mqd_query_time_lo; > + uint32_t cp_mqd_query_time_hi; > + uint32_t cp_mqd_connect_start_time_lo; > + uint32_t cp_mqd_connect_start_time_hi; > + uint32_t cp_mqd_connect_end_time_lo; > + uint32_t cp_mqd_connect_end_time_hi; > + uint32_t cp_mqd_connect_end_wf_count; > + uint32_t cp_mqd_connect_end_pq_rptr; > + uint32_t cp_mqd_connect_endvi_sdma_mqd_pq_wptr; > + uint32_t cp_mqd_connect_end_ib_rptr; > + uint32_t reserved38; > + uint32_t reserved39; > + uint32_t cp_mqd_save_start_time_lo; > + uint32_t cp_mqd_save_start_time_hi; > + uint32_t cp_mqd_save_end_time_lo; > + uint32_t cp_mqd_save_end_time_hi; > + uint32_t cp_mqd_restore_start_time_lo; > + uint32_t cp_mqd_restore_start_time_hi; > + uint32_t cp_mqd_restore_end_time_lo; > + uint32_t cp_mqd_restore_end_time_hi; > + uint32_t disable_queue; > + uint32_t reserved41; > + uint32_t gds_cs_ctxsw_cnt0; > + uint32_t gds_cs_ctxsw_cnt1; > + uint32_t gds_cs_ctxsw_cnt2; > + uint32_t gds_cs_ctxsw_cnt3; > + uint32_t reserved42; > + uint32_t reserved43; > + uint32_t cp_pq_exe_status_lo; > + uint32_t cp_pq_exe_status_hi; > + uint32_t cp_packet_id_lo; > + uint32_t cp_packet_id_hi; > + uint32_t cp_packet_exe_status_lo; > + uint32_t cp_packet_exe_status_hi; > + uint32_t gds_save_base_addr_lo; > + uint32_t gds_save_base_addr_hi; > + uint32_t gds_save_mask_lo; > + uint32_t gds_save_mask_hi; > + uint32_t ctx_save_base_addr_lo; > + uint32_t ctx_save_base_addr_hi; > + uint32_t dynamic_cu_mask_addr_lo; > + uint32_t dynamic_cu_mask_addr_hi; > + uint32_t cp_mqd_base_addr_lo; > + uint32_t cp_mqd_base_addr_hi; > + uint32_t cp_hqd_active; > + uint32_t cp_hqd_vmid; > + uint32_t cp_hqd_persistent_state; > + uint32_t cp_hqd_pipe_priority; > + uint32_t cp_hqd_queue_priority; > + uint32_t cp_hqd_quantum; > + uint32_t cp_hqd_pq_base_lo; > + uint32_t cp_hqd_pq_base_hi; > + uint32_t cp_hqd_pq_rptr; > + uint32_t cp_hqd_pq_rptr_report_addr_lo; > + uint32_t cp_hqd_pq_rptr_report_addr_hi; > + uint32_t cp_hqd_pq_wptr_poll_addr_lo; > + uint32_t cp_hqd_pq_wptr_poll_addr_hi; > + uint32_t cp_hqd_pq_doorbell_control; > + uint32_t cp_hqd_pq_wptr; > + uint32_t cp_hqd_pq_control; > + uint32_t cp_hqd_ib_base_addr_lo; > + uint32_t cp_hqd_ib_base_addr_hi; > + uint32_t cp_hqd_ib_rptr; > + uint32_t cp_hqd_ib_control; > + uint32_t cp_hqd_iq_timer; > + uint32_t cp_hqd_iq_rptr; > + uint32_t cp_hqd_dequeue_request; > + uint32_t cp_hqd_dma_offload; > + uint32_t cp_hqd_sema_cmd; > + uint32_t cp_hqd_msg_type; > + uint32_t cp_hqd_atomic0_preop_lo; > + uint32_t cp_hqd_atomic0_preop_hi; > + uint32_t cp_hqd_atomic1_preop_lo; > + uint32_t cp_hqd_atomic1_preop_hi; > + uint32_t cp_hqd_hq_status0; > + uint32_t cp_hqd_hq_control0; > + uint32_t cp_mqd_control; > + uint32_t cp_hqd_hq_status1; > + uint32_t cp_hqd_hq_control1; > + uint32_t cp_hqd_eop_base_addr_lo; > + uint32_t cp_hqd_eop_base_addr_hi; > + uint32_t cp_hqd_eop_control; > + uint32_t cp_hqd_eop_rptr; > + uint32_t cp_hqd_eop_wptr; > + uint32_t cp_hqd_eop_done_events; > + uint32_t cp_hqd_ctx_save_base_addr_lo; > + uint32_t cp_hqd_ctx_save_base_addr_hi; > + uint32_t cp_hqd_ctx_save_control; > + uint32_t cp_hqd_cntl_stack_offset; > + uint32_t cp_hqd_cntl_stack_size; > + uint32_t cp_hqd_wg_state_offset; > + uint32_t cp_hqd_ctx_save_size; > + uint32_t cp_hqd_gds_resource_state; > + uint32_t cp_hqd_error; > + uint32_t cp_hqd_eop_wptr_mem; > + uint32_t cp_hqd_eop_dones; > + uint32_t reserved46; > + uint32_t reserved47; > + uint32_t reserved48; > + uint32_t reserved49; > + uint32_t reserved50; > + uint32_t reserved51; > + uint32_t reserved52; > + uint32_t reserved53; > + uint32_t reserved54; > + uint32_t reserved55; > + uint32_t iqtimer_pkt_header; > + uint32_t iqtimer_pkt_dw0; > + uint32_t iqtimer_pkt_dw1; > + uint32_t iqtimer_pkt_dw2; > + uint32_t iqtimer_pkt_dw3; > + uint32_t iqtimer_pkt_dw4; > + uint32_t iqtimer_pkt_dw5; > + uint32_t iqtimer_pkt_dw6; > + uint32_t iqtimer_pkt_dw7; > + uint32_t iqtimer_pkt_dw8; > + uint32_t iqtimer_pkt_dw9; > + uint32_t iqtimer_pkt_dw10; > + uint32_t iqtimer_pkt_dw11; > + uint32_t iqtimer_pkt_dw12; > + uint32_t iqtimer_pkt_dw13; > + uint32_t iqtimer_pkt_dw14; > + uint32_t iqtimer_pkt_dw15; > + uint32_t iqtimer_pkt_dw16; > + uint32_t iqtimer_pkt_dw17; > + uint32_t iqtimer_pkt_dw18; > + uint32_t iqtimer_pkt_dw19; > + uint32_t iqtimer_pkt_dw20; > + uint32_t iqtimer_pkt_dw21; > + uint32_t iqtimer_pkt_dw22; > + uint32_t iqtimer_pkt_dw23; > + uint32_t iqtimer_pkt_dw24; > + uint32_t iqtimer_pkt_dw25; > + uint32_t iqtimer_pkt_dw26; > + uint32_t iqtimer_pkt_dw27; > + uint32_t iqtimer_pkt_dw28; > + uint32_t iqtimer_pkt_dw29; > + uint32_t iqtimer_pkt_dw30; > + uint32_t iqtimer_pkt_dw31; > + uint32_t reserved56; > + uint32_t reserved57; > + uint32_t reserved58; > + uint32_t set_resources_header; > + uint32_t set_resources_dw1; > + uint32_t set_resources_dw2; > + uint32_t set_resources_dw3; > + uint32_t set_resources_dw4; > + uint32_t set_resources_dw5; > + uint32_t set_resources_dw6; > + uint32_t set_resources_dw7; > + uint32_t reserved59; > + uint32_t reserved60; > + uint32_t reserved61; > + uint32_t reserved62; > + uint32_t reserved63; > + uint32_t reserved64; > + uint32_t reserved65; > + uint32_t reserved66; > + uint32_t reserved67; > + uint32_t reserved68; > + uint32_t reserved69; > + uint32_t reserved70; > + uint32_t reserved71; > + uint32_t reserved72; > + uint32_t reserved73; > + uint32_t reserved74; > + uint32_t reserved75; > + uint32_t reserved76; > + uint32_t reserved77; > + uint32_t reserved78; > + uint32_t reserved_t[256]; > +}; > + > +struct vi_mqd_allocation { > + struct vi_mqd mqd; > + uint32_t wptr_poll_mem; > + uint32_t rptr_report_mem; > + uint32_t dyamic_cu_mask; > + uint32_t dyamic_rb_mask; > +}; > + > +struct cz_mqd { > + uint32_t header; > + uint32_t compute_dispatch_initiator; > + uint32_t compute_dim_x; > + uint32_t compute_dim_y; > + uint32_t compute_dim_z; > + uint32_t compute_start_x; > + uint32_t compute_start_y; > + uint32_t compute_start_z; > + uint32_t compute_num_thread_x; > + uint32_t compute_num_thread_y; > + uint32_t compute_num_thread_z; > + uint32_t compute_pipelinestat_enable; > + uint32_t compute_perfcount_enable; > + uint32_t compute_pgm_lo; > + uint32_t compute_pgm_hi; > + uint32_t compute_tba_lo; > + uint32_t compute_tba_hi; > + uint32_t compute_tma_lo; > + uint32_t compute_tma_hi; > + uint32_t compute_pgm_rsrc1; > + uint32_t compute_pgm_rsrc2; > + uint32_t compute_vmid; > + uint32_t compute_resource_limits; > + uint32_t compute_static_thread_mgmt_se0; > + uint32_t compute_static_thread_mgmt_se1; > + uint32_t compute_tmpring_size; > + uint32_t compute_static_thread_mgmt_se2; > + uint32_t compute_static_thread_mgmt_se3; > + uint32_t compute_restart_x; > + uint32_t compute_restart_y; > + uint32_t compute_restart_z; > + uint32_t compute_thread_trace_enable; > + uint32_t compute_misc_reserved; > + uint32_t compute_dispatch_id; > + uint32_t compute_threadgroup_id; > + uint32_t compute_relaunch; > + uint32_t compute_wave_restore_addr_lo; > + uint32_t compute_wave_restore_addr_hi; > + uint32_t compute_wave_restore_control; > uint32_t reserved_39; > uint32_t reserved_40; > uint32_t reserved_41; > -- > 1.9.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx