For VCN/JPEG 4.0.3, use only the local addressing scheme. - Mask bit higher than AID0 range - Remove gmc v9 mmhub vmid replacement, since the bit will be masked later in register write/wait Signed-off-by: Jane Jian <Jane.Jian@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 19 ++++++++-- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 46 ++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b73136d390cc..2c7b4002ed72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -844,11 +844,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, req = hub->vm_inv_eng0_req + hub->eng_distance * eng; ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - if (vmhub >= AMDGPU_MMHUB0(0)) - inst = 0; - else - inst = vmhub; - /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 30a143ab592d..ad524ddc9760 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -32,6 +32,9 @@ #include "vcn/vcn_4_0_3_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, @@ -824,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -865,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { - uint32_t reg_offset = (reg << 2); + uint32_t reg_offset; + + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_JPEG_REG_OFFSET(reg); + + reg_offset = (reg << 2); amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 101b120f6fbd..9bae95538b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define NORMALIZE_VCN_REG_OFFSET(offset) \ + (offset & 0x1FFFF) + static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); @@ -1375,6 +1378,43 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + /* For VF, only local offsets should be used */ + if (amdgpu_sriov_vf(ring->adev)) + reg = NORMALIZE_VCN_REG_OFFSET(reg); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, + lower_32_bits(pd_addr), 0xffffffff); +} + static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) { /* VCN engine access for HDP flush doesn't work when RRMT is enabled. @@ -1421,7 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ .emit_ib = vcn_v2_0_enc_ring_emit_ib, .emit_fence = vcn_v2_0_enc_ring_emit_fence, - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_unified_ring_test_ib, @@ -1430,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -- 2.34.1