On 3/5/2024 2:44 PM, Christian König wrote: > Am 05.03.24 um 10:01 schrieb Lazar, Lijo: >> On 3/5/2024 2:22 PM, Christian König wrote: >>> Am 05.03.24 um 07:40 schrieb Lijo Lazar: >>>> VCN 4.0.3 cannot trigger HDP flush with RRMT enabled. Instead, trigger >>>> HDP flush from host side before ringing doorbell. >>> Well that won't work like that. >>> >>> The HDP flush is supposed to be emitted into the ring buffer of the >>> engine. If you just "emulate" it by a register write than that write >>> comes at the wrong time. >>> >>> This code here is seriously broken. >>> >> This approach was in fact suggested by VCN IP and FW team since RRMT is >> not working from VCN side to do a remote flush of HDP (HDP is active >> only in one of the AIDs). They mentioned that VCN doesn't need to flush >> HDP at the time of processing. > > In this case you don't need to implement anything here. > > See amdgpu_device_flush_hdp(), we already have the CPU fallback for the > engines which don't have access to the HDP: > > #ifdef CONFIG_X86_64 > if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) > return; > #endif > if (adev->gmc.xgmi.connected_to_cpu) > return; > > if (ring && ring->funcs->emit_hdp_flush) > amdgpu_ring_emit_hdp_flush(ring); > else > amdgpu_asic_flush_hdp(adev, ring); > The recommendation was to do a HDP flush at the time of packet submission before write pointer update. Hence we added a dummy emit_hdp_flush() to the ring which does nothing at this point and does a flush only before it updates write pointer. Thanks, Lijo > Regards, > Christian. > >> >> Thanks, >> Lijo >> >>> Regards, >>> Christian. >>> >>>> Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 25 >>>> +++++++++++++++++++++++++ >>>> 1 file changed, 25 insertions(+) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c >>>> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c >>>> index 810bbfccd6f2..7b5ad13b618e 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c >>>> @@ -36,6 +36,7 @@ >>>> #include "vcn/vcn_4_0_3_offset.h" >>>> #include "vcn/vcn_4_0_3_sh_mask.h" >>>> #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" >>>> +#include <uapi/linux/kfd_ioctl.h> >>>> #define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL >>>> #define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX >>>> @@ -1380,6 +1381,26 @@ static uint64_t >>>> vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) >>>> regUVD_RB_WPTR); >>>> } >>>> +static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring >>>> *ring) >>>> +{ >>>> + /* VCN engine access for HDP flush doesn't work when RRMT is >>>> enabled. >>>> + * This is a workaround to avoid any HDP flush through VCN ring. >>>> Instead >>>> + * HDP flush will be done by driver while submitting doorbell. >>>> + */ >>>> +} >>>> + >>>> +static void vcn_v4_0_3_flush_hdp(struct amdgpu_ring *ring) >>>> +{ >>>> + struct amdgpu_device *adev = ring->adev; >>>> + >>>> +#ifdef CONFIG_X86_64 >>>> + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) >>>> + return; >>>> +#endif >>>> + if (ring->wptr) >>>> + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + >>>> KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); >>>> +} >>>> + >>>> /** >>>> * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer >>>> * >>>> @@ -1394,6 +1415,9 @@ static void >>>> vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) >>>> if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) >>>> DRM_ERROR("wrong ring id is identified in %s", __func__); >>>> + /* Flush HDP before ringing doorbell */ >>>> + vcn_v4_0_3_flush_hdp(ring); >>>> + >>>> if (ring->use_doorbell) { >>>> *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); >>>> WDOORBELL32(ring->doorbell_index, >>>> lower_32_bits(ring->wptr)); >>>> @@ -1420,6 +1444,7 @@ static const struct amdgpu_ring_funcs >>>> vcn_v4_0_3_unified_ring_vm_funcs = { >>>> .emit_ib = vcn_v2_0_enc_ring_emit_ib, >>>> .emit_fence = vcn_v2_0_enc_ring_emit_fence, >>>> .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, >>>> + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, >>>> .test_ring = amdgpu_vcn_enc_ring_test_ring, >>>> .test_ib = amdgpu_vcn_unified_ring_test_ib, >>>> .insert_nop = amdgpu_ring_insert_nop, >