On Tue, Sep 5, 2023 at 3:00 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > For the PASID flushing we already handled that at a higher layer, apply > those workarounds to the standard flush as well. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 19 +++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 74 ++++++++----------------- > 2 files changed, 42 insertions(+), 51 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index c24252304d48..8a5381ca7713 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -597,6 +597,14 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > if(!down_read_trylock(&adev->reset_domain->sem)) > return; > > + if (adev->gmc.flush_tlb_needs_extra_type_2) > + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, > + vmhub, 2); > + > + if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) > + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, > + vmhub, 0); > + > adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub, > flush_type); > up_read(&adev->reset_domain->sem); > @@ -647,6 +655,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, > > if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || > !down_read_trylock(&adev->reset_domain->sem)) { > + > + if (adev->gmc.flush_tlb_needs_extra_type_2) > + adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, > + 2, all_hub, > + inst); > + > + if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) > + adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, > + 0, all_hub, > + inst); > + > adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, > flush_type, all_hub, > inst); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index c5df8f052f3f..a1a6f4b63208 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -812,37 +812,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > uint32_t vmhub, uint32_t flush_type) > { > bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); > - u32 j, inv_req, inv_req2, tmp, sem, req, ack; > + u32 j, inv_req, tmp, sem, req, ack; > const unsigned int eng = 17; > struct amdgpu_vmhub *hub; > > BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS); > > hub = &adev->vmhub[vmhub]; > + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); > sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; > req = hub->vm_inv_eng0_req + hub->eng_distance * eng; > ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; > > - if (adev->gmc.xgmi.num_physical_nodes && > - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) { > - /* Vega20+XGMI caches PTEs in TC and TLB. Add a > - * heavy-weight TLB flush (type 2), which flushes > - * both. Due to a race condition with concurrent > - * memory accesses using the same TLB cache line, we > - * still need a second TLB flush after this. > - */ > - inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); > - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); > - } else if (flush_type == 2 && > - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && > - adev->rev_id == 0) { > - inv_req = gmc_v9_0_get_invalidate_req(vmid, 0); > - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); > - } else { > - inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); > - inv_req2 = 0; > - } > - > /* This is necessary for a HW workaround under SRIOV as well > * as GFXOFF under bare metal > */ > @@ -853,10 +834,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > > amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, > 1 << vmid); > - if (inv_req2) > - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, > - inv_req2, 1 << vmid); > - > return; > } > > @@ -886,34 +863,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); > } > > - do { > - if (vmhub >= AMDGPU_MMHUB0(0)) > - WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req); > - else > - WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req); > - > - /* > - * Issue a dummy read to wait for the ACK register to > - * be cleared to avoid a false ACK due to the new fast > - * GRBM interface. > - */ > - if ((vmhub == AMDGPU_GFXHUB(0)) && > - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) > - RREG32_NO_KIQ(req); > + if (vmhub >= AMDGPU_MMHUB0(0)) > + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req); > + else > + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req); > > - for (j = 0; j < adev->usec_timeout; j++) { > - if (vmhub >= AMDGPU_MMHUB0(0)) > - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack); > - else > - tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack); > - if (tmp & (1 << vmid)) > - break; > - udelay(1); > - } > + /* > + * Issue a dummy read to wait for the ACK register to > + * be cleared to avoid a false ACK due to the new fast > + * GRBM interface. > + */ > + if ((vmhub == AMDGPU_GFXHUB(0)) && > + (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) > + RREG32_NO_KIQ(req); > > - inv_req = inv_req2; > - inv_req2 = 0; > - } while (inv_req); > + for (j = 0; j < adev->usec_timeout; j++) { > + if (vmhub >= AMDGPU_MMHUB0(0)) > + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack); > + else > + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack); > + if (tmp & (1 << vmid)) > + break; > + udelay(1); > + } > > /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ > if (use_semaphore) { > -- > 2.34.1 >