On 7/16/2024 2:17 PM, Jane Jian wrote: > For VCN/JPEG 4.0.3, use only the local addressing scheme. > > - Mask bit higher than AID0 range > > v2 > remain the case for mmhub use master XCC > > Signed-off-by: Jane Jian <Jane.Jian@xxxxxxx> This patch is Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> Thanks, Lijo > --- > drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 19 ++++++++-- > drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 46 ++++++++++++++++++++++-- > 2 files changed, 60 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c > index 30a143ab592d..ad524ddc9760 100644 > --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c > @@ -32,6 +32,9 @@ > #include "vcn/vcn_4_0_3_sh_mask.h" > #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" > > +#define NORMALIZE_JPEG_REG_OFFSET(offset) \ > + (offset & 0x1FFFF) > + > enum jpeg_engin_status { > UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, > UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, > @@ -824,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, > void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > uint32_t val, uint32_t mask) > { > - uint32_t reg_offset = (reg << 2); > + uint32_t reg_offset; > + > + /* For VF, only local offsets should be used */ > + if (amdgpu_sriov_vf(ring->adev)) > + reg = NORMALIZE_JPEG_REG_OFFSET(reg); > + > + reg_offset = (reg << 2); > > amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, > 0, 0, PACKETJ_TYPE0)); > @@ -865,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, > > void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) > { > - uint32_t reg_offset = (reg << 2); > + uint32_t reg_offset; > + > + /* For VF, only local offsets should be used */ > + if (amdgpu_sriov_vf(ring->adev)) > + reg = NORMALIZE_JPEG_REG_OFFSET(reg); > + > + reg_offset = (reg << 2); > > amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, > 0, 0, PACKETJ_TYPE0)); > diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > index 101b120f6fbd..9bae95538b62 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c > @@ -45,6 +45,9 @@ > #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 > #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 > > +#define NORMALIZE_VCN_REG_OFFSET(offset) \ > + (offset & 0x1FFFF) > + > static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); > static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); > static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); > @@ -1375,6 +1378,43 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) > regUVD_RB_WPTR); > } > > +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > + uint32_t val, uint32_t mask) > +{ > + /* For VF, only local offsets should be used */ > + if (amdgpu_sriov_vf(ring->adev)) > + reg = NORMALIZE_VCN_REG_OFFSET(reg); > + > + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); > + amdgpu_ring_write(ring, reg << 2); > + amdgpu_ring_write(ring, mask); > + amdgpu_ring_write(ring, val); > +} > + > +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) > +{ > + /* For VF, only local offsets should be used */ > + if (amdgpu_sriov_vf(ring->adev)) > + reg = NORMALIZE_VCN_REG_OFFSET(reg); > + > + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); > + amdgpu_ring_write(ring, reg << 2); > + amdgpu_ring_write(ring, val); > +} > + > +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, > + unsigned int vmid, uint64_t pd_addr) > +{ > + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; > + > + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); > + > + /* wait for reg writes */ > + vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + > + vmid * hub->ctx_addr_distance, > + lower_32_bits(pd_addr), 0xffffffff); > +} > + > static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) > { > /* VCN engine access for HDP flush doesn't work when RRMT is enabled. > @@ -1421,7 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { > .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ > .emit_ib = vcn_v2_0_enc_ring_emit_ib, > .emit_fence = vcn_v2_0_enc_ring_emit_fence, > - .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, > + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, > .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, > .test_ring = amdgpu_vcn_enc_ring_test_ring, > .test_ib = amdgpu_vcn_unified_ring_test_ib, > @@ -1430,8 +1470,8 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { > .pad_ib = amdgpu_ring_generic_pad_ib, > .begin_use = amdgpu_vcn_ring_begin_use, > .end_use = amdgpu_vcn_ring_end_use, > - .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, > - .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, > + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, > + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, > .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, > }; >