Am 05.11.19 um 12:42 schrieb Zhu, Changfeng: > From: changzhu <Changfeng.Zhu@xxxxxxx> > > The GRBM register interface is now capable of bursting 1 cycle per > register wr->wr, wr->rd much faster than previous muticycle per > transaction done interface. This has caused a problem where > status registers requiring HW to update have a 1 cycle delay, due > to the register update having to go through GRBM. > > For cp ucode, it has realized dummy read in cp firmware.It covers > the use of WAIT_REG_MEM operation 1 case only.So it needs to call > gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to > update firmware in case firmware is too old to have function to realize > dummy read in cp firmware. > > For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is > moved to gfxhub in gfx10. So it needs to add dummy read in driver > between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0. > > Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3 > Signed-off-by: changzhu <Changfeng.Zhu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 47 +++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 ++--- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 13 ++++++- > 4 files changed, 63 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 459aa9059542..a74ecd449775 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -267,6 +267,7 @@ struct amdgpu_gfx { > uint32_t mec2_feature_version; > bool mec_fw_write_wait; > bool me_fw_write_wait; > + bool cp_fw_write_wait; > struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; > unsigned num_gfx_rings; > struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 17a5cbfd0024..e82b6d796b69 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) > kfree(adev->gfx.rlc.register_list_format); > } > > +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) > +{ > + adev->gfx.cp_fw_write_wait = false; > + > + switch (adev->asic_type) { > + case CHIP_NAVI10: > + case CHIP_NAVI12: > + case CHIP_NAVI14: > + if ((adev->gfx.me_fw_version >= 0x00000046) && > + (adev->gfx.me_feature_version >= 27) && > + (adev->gfx.pfp_fw_version >= 0x00000068) && > + (adev->gfx.pfp_feature_version >= 27) && > + (adev->gfx.mec_fw_version >= 0x0000005b) && > + (adev->gfx.mec_feature_version >= 27)) > + adev->gfx.cp_fw_write_wait = true; > + break; > + default: > + break; > + } > + > + if (adev->gfx.cp_fw_write_wait == false) > + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ > + GRBM requires 1-cycle delay in cp firmware\n"); > +} > + > + > static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) > { > const struct rlc_firmware_header_v2_1 *rlc_hdr; > @@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); > } > > +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, > + uint32_t reg0, uint32_t reg1, > + uint32_t ref, uint32_t mask) > +{ > + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); > + struct amdgpu_device *adev = ring->adev; > + bool fw_version_ok = false; > + > + gfx_v10_0_check_fw_write_wait(adev); > + fw_version_ok = adev->gfx.cp_fw_write_wait; > + > + if (fw_version_ok) > + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, > + ref, mask, 0x20); > + else > + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, > + ref, mask); > +} > + > static void > gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, > uint32_t me, uint32_t pipe, > @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { > .emit_tmz = gfx_v10_0_ring_emit_tmz, > .emit_wreg = gfx_v10_0_ring_emit_wreg, > .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, > + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, > }; > > static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { > @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_wreg = gfx_v10_0_ring_emit_wreg, > .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, > + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, > }; > > static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index 3b00bce14cfb..22c807309a22 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, > amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), > upper_32_bits(pd_addr)); > > - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); > - > - /* wait for the invalidate to complete */ > - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, > - 1 << vmid, 1 << vmid); > + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, > + hub->vm_inv_eng0_ack + eng, > + req, 1 << vmid); You need to double check that all engines on gfx10 implement the emit_reg_write_reg_wait callback (probably as dummy with .emit_reg_write_reg_wait=amdgpu_ring_emit_reg_write_reg_wait_helper). Otherwise that could quickly result in a NULL pointer deref here for the multimedia engines. Apart from that the patch looks good to me now. Thanks, Christian. > > return pd_addr; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > index 3460c00f3eaa..7b15ddc739e5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); > } > > +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, > + uint32_t reg0, uint32_t reg1, > + uint32_t ref, uint32_t mask) > +{ > + amdgpu_ring_emit_wreg(ring, reg0, ref); > + /* wait for a cycle to reset vm_inv_eng*_ack */ > + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); > + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); > +} > + > static int sdma_v5_0_early_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { > 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ > /* sdma_v5_0_ring_emit_vm_flush */ > SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + > - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + > + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + > 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ > .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ > .emit_ib = sdma_v5_0_ring_emit_ib, > @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { > .pad_ib = sdma_v5_0_ring_pad_ib, > .emit_wreg = sdma_v5_0_ring_emit_wreg, > .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, > + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, > .init_cond_exec = sdma_v5_0_ring_init_cond_exec, > .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, > .preempt_ib = sdma_v5_0_ring_preempt_ib, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx