Am 05.11.19 um 11:21 schrieb Zhu, Changfeng: > Hi Chris, > > Maybe it's better to use amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); to replace > amdgpu_ring_emit_reg_wait(ring, reg1, 0, 0); ? Good point. I've mixed up request and acknowledge register. Important is that you need 0 as mask and value, or otherwise we could potentially wait forever. Regards, Christian. > > http://ontrack-internal.amd.com/browse/SWDEV-192660 > Jira ticket recommends to read VM_INVALIDATE_ENG*_REQ. > > BR, > Changfeng. > > -----Original Message----- > From: Koenig, Christian <Christian.Koenig@xxxxxxx> > Sent: Tuesday, November 5, 2019 5:13 PM > To: Zhu, Changfeng <Changfeng.Zhu@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Tuikov, Luben <Luben.Tuikov@xxxxxxx>; Huang, Ray <Ray.Huang@xxxxxxx>; Huang, Shimmer <Xinmei.Huang@xxxxxxx> > Subject: Re: [PATCH] drm/amdgpu: add dummy read by engines for some GCVM status registers > > Am 05.11.19 um 07:32 schrieb Zhu, Changfeng: >> From: changzhu <Changfeng.Zhu@xxxxxxx> >> >> The GRBM register interface is now capable of bursting 1 cycle per >> register wr->wr, wr->rd much faster than previous muticycle per >> transaction done interface. This has caused a problem where status >> registers requiring HW to update have a 1 cycle delay, due to the >> register update having to go through GRBM. >> >> For cp ucode, it has realized dummy read in cp firmware.It covers the >> use of WAIT_REG_MEM operation 1 case only.So it needs to call >> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning >> to update firmware in case firmware is too old to have function to >> realize dummy read in cp firmware. >> >> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma >> is moved to gfxhub in gfx10. So it needs to add dummy read in driver >> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0. > First of all thanks for getting your environment setup properly, we are finally making progress with that issue. > > A bunch of nice to have comments below and two major bugs/typos which really needs to be fixed. > >> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3 >> Signed-off-by: changzhu <Changfeng.Zhu@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + >> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 50 +++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++++ >> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 ++-- >> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 13 ++++++- >> 5 files changed, 73 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> index 459aa9059542..a74ecd449775 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h >> @@ -267,6 +267,7 @@ struct amdgpu_gfx { >> uint32_t mec2_feature_version; >> bool mec_fw_write_wait; >> bool me_fw_write_wait; >> + bool cp_fw_write_wait; >> struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; >> unsigned num_gfx_rings; >> struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> index 17a5cbfd0024..814764723c26 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) >> kfree(adev->gfx.rlc.register_list_format); >> } >> >> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) >> +{ >> + adev->gfx.cp_fw_write_wait = false; >> + >> + switch (adev->asic_type) { >> + case CHIP_NAVI10: >> + case CHIP_NAVI12: >> + case CHIP_NAVI14: >> + if ((adev->gfx.me_fw_version >= 0x00000046) && >> + (adev->gfx.me_feature_version >= 27) && >> + (adev->gfx.pfp_fw_version >= 0x00000068) && >> + (adev->gfx.pfp_feature_version >= 27) && >> + (adev->gfx.mec_fw_version >= 0x0000005b) && >> + (adev->gfx.mec_feature_version >= 27)) >> + adev->gfx.cp_fw_write_wait = true; >> + break; >> + default: >> + break; >> + } >> + >> + if (adev->gfx.cp_fw_write_wait == false) >> + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ >> + GRBM requires 1-cycle delay in cp firmware\n"); } >> + >> + >> static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) >> { >> const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,28 >> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >> gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); >> } >> >> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, >> + uint32_t reg0, uint32_t reg1, >> + uint32_t ref, uint32_t mask) >> +{ >> + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); >> + struct amdgpu_device *adev = ring->adev; >> + bool fw_version_ok = false; >> + >> + gfx_v10_0_check_fw_write_wait(adev); >> + >> + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX || >> + ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) > That check is probably superfluous. A few lines below you are using the function in the gfx_v10_0_ring_funcs_gfx and gfx_v10_0_ring_funcs_compute, so the ring->funcs->type is always constant. > >> + fw_version_ok = adev->gfx.cp_fw_write_wait; >> + >> + if (fw_version_ok) >> + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, >> + ref, mask, 0x20); >> + else >> + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, >> + ref, mask); >> +} >> + >> static void >> gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, >> uint32_t me, uint32_t pipe, @@ -5158,6 +5206,7 @@ static >> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { >> .emit_tmz = gfx_v10_0_ring_emit_tmz, >> .emit_wreg = gfx_v10_0_ring_emit_wreg, >> .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, >> + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, >> }; >> >> static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = >> { @@ -5191,6 +5240,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { >> .pad_ib = amdgpu_ring_generic_pad_ib, >> .emit_wreg = gfx_v10_0_ring_emit_wreg, >> .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, >> + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, >> }; >> >> static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { >> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> index 9d5f900e3e1c..f52fcb895d51 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >> @@ -982,6 +982,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) >> adev->gfx.me_fw_write_wait = false; >> adev->gfx.mec_fw_write_wait = false; >> >> + if ((adev->gfx.mec_fw_version < 0x000001a5) || >> + (adev->gfx.mec_feature_version < 46) || >> + (adev->gfx.pfp_fw_version < 0x000000b7) || >> + (adev->gfx.pfp_feature_version < 46)) >> + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ >> + GRBM requires 1-cycle delay in cp firmware\n"); >> + > Not a hard requirement, but it would be nice to have this in a separate patch. So that gfx9 and gfx10 changes are clearly separated. > >> switch (adev->asic_type) { >> case CHIP_VEGA10: >> if ((adev->gfx.me_fw_version >= 0x0000009c) && diff --git >> a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> index 3b00bce14cfb..9ff3ec1531ed 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c >> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, >> amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), >> upper_32_bits(pd_addr)); >> >> - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); >> - >> - /* wait for the invalidate to complete */ >> - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, >> - 1 << vmid, 1 << vmid); >> + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, >> + hub->vm_inv_eng0_ack, > That register offset is wrong! This needs to be "hub->vm_inv_eng0_ack + eng". > >> + req, 1 << vmid); >> >> return pd_addr; >> } >> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >> index 3460c00f3eaa..4cf8e3d23c60 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c >> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, >> SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); >> } >> >> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, >> + uint32_t reg0, uint32_t reg1, >> + uint32_t ref, uint32_t mask) { >> + amdgpu_ring_emit_wreg(ring, reg0, ref); >> + /* wait for a cycle to reset vm_inv_eng*_ack */ >> + amdgpu_ring_emit_reg_wait(ring, reg0, mask, mask); > Well, that's exactly what won't work. Please use the following instead: > > amdgpu_ring_emit_reg_wait(ring, reg1, 0, 0); > > Regards, > Christian. > >> + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } >> + >> static int sdma_v5_0_early_init(void *handle) >> { >> struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ >> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { >> 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ >> /* sdma_v5_0_ring_emit_vm_flush */ >> SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + >> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + >> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + >> 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ >> .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ >> .emit_ib = sdma_v5_0_ring_emit_ib, >> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { >> .pad_ib = sdma_v5_0_ring_pad_ib, >> .emit_wreg = sdma_v5_0_ring_emit_wreg, >> .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, >> + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, >> .init_cond_exec = sdma_v5_0_ring_init_cond_exec, >> .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, >> .preempt_ib = sdma_v5_0_ring_preempt_ib, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx