On Tue, Sep 5, 2023 at 2:20 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Move the SDMA workaround necessary for Navi 1x into a higher layer. You could split out the register offsets code cleanup into a separate patch. Either way, the patch is: Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 48 +++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 5 +- > drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 3 + > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 159 ++++++----------------- > 4 files changed, 97 insertions(+), 118 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index d78bd9732543..857051093900 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) > return 0; > } > > +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > +{ > + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; > + struct dma_fence *fence; > + struct amdgpu_job *job; > + int r; > + > + if (!hub->sdma_invalidation_workaround || vmid || > + !adev->mman.buffer_funcs_enabled || > + !adev->ib_pool_ready || amdgpu_in_reset(adev) || > + !ring->sched.ready) { > + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub, > + flush_type); > + return; > + } > + > + /* The SDMA on Navi 1x has a bug which can theoretically result in memory > + * corruption if an invalidation happens at the same time as an VA > + * translation. Avoid this by doing the invalidation from the SDMA > + * itself at least for GART. > + */ > + mutex_lock(&adev->mman.gtt_window_lock); > + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, > + AMDGPU_FENCE_OWNER_UNDEFINED, > + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, > + &job); > + if (r) > + goto error_alloc; > + > + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); > + job->vm_needs_flush = true; > + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; > + amdgpu_ring_pad_ib(ring, &job->ibs[0]); > + fence = amdgpu_job_submit(job); > + mutex_unlock(&adev->mman.gtt_window_lock); > + > + dma_fence_wait(fence, false); > + dma_fence_put(fence); > + > + return; > + > +error_alloc: > + mutex_unlock(&adev->mman.gtt_window_lock); > + DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); > +} > + > /** > * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ > * @adev: amdgpu_device pointer > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > index fdc25cd559b6..9e7df2f69123 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h > @@ -117,6 +117,8 @@ struct amdgpu_vmhub { > > uint32_t vm_contexts_disable; > > + bool sdma_invalidation_workaround; > + > const struct amdgpu_vmhub_funcs *vmhub_funcs; > }; > > @@ -335,7 +337,6 @@ struct amdgpu_gmc { > u64 noretry_flags; > }; > > -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) > #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \ > ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ > ((adev), (pasid), (type), (allhub), (inst))) > @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); > int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); > void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); > int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); > +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type); > > extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); > extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > index a041c6c970e1..8521c45e8f38 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev) > GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | > GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; > > + /* TODO: This is only needed on some Navi 1x revisions */ > + hub->sdma_invalidation_workaround = true; > + > hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index fa87a85e1017..1f70c57bcd69 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -230,20 +230,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( > * by the amdgpu vm/hsa code. > */ > > -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, > - unsigned int vmhub, uint32_t flush_type) > +/** > + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback > + * > + * @adev: amdgpu_device pointer > + * @vmid: vm instance to flush > + * @vmhub: vmhub type > + * @flush_type: the flush type > + * > + * Flush the TLB for the requested page table. > + */ > +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > + uint32_t vmhub, uint32_t flush_type) > { > bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); > struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; > u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); > - u32 tmp; > /* Use register 17 for GART */ > const unsigned int eng = 17; > - unsigned int i; > unsigned char hub_ip = 0; > + u32 sem, req, ack; > + unsigned int i; > + u32 tmp; > > - hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? > - GC_HWIP : MMHUB_HWIP; > + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; > + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; > + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; > + > + /* flush hdp cache */ > + adev->hdp.funcs->flush_hdp(adev, NULL); > + > + /* For SRIOV run time, driver shouldn't access the register through MMIO > + * Directly use kiq to do the vm invalidation instead > + */ > + if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && > + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && > + down_read_trylock(&adev->reset_domain->sem)) { > + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, > + 1 << vmid); > + up_read(&adev->reset_domain->sem); > + return; > + } > + > + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; > > spin_lock(&adev->gmc.invalidate_lock); > /* > @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, > if (use_semaphore) { > for (i = 0; i < adev->usec_timeout; i++) { > /* a read return value of 1 means semaphore acuqire */ > - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + > - hub->eng_distance * eng, hub_ip); > - > + tmp = RREG32_RLC_NO_KIQ(sem, hub_ip); > if (tmp & 0x1) > break; > udelay(1); > @@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, > DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); > } > > - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + > - hub->eng_distance * eng, > - inv_req, hub_ip); > + WREG32_RLC_NO_KIQ(req, inv_req, hub_ip); > > /* > * Issue a dummy read to wait for the ACK register to be cleared > @@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, > */ > if ((vmhub == AMDGPU_GFXHUB(0)) && > (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) > - RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + > - hub->eng_distance * eng, hub_ip); > + RREG32_RLC_NO_KIQ(req, hub_ip); > > /* Wait for ACK with a delay.*/ > for (i = 0; i < adev->usec_timeout; i++) { > - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + > - hub->eng_distance * eng, hub_ip); > - > + tmp = RREG32_RLC_NO_KIQ(ack, hub_ip); > tmp &= 1 << vmid; > if (tmp) > break; > @@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, > > /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ > if (use_semaphore) > - /* > - * add semaphore release after invalidation, > - * write with 0 means semaphore release > - */ > - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + > - hub->eng_distance * eng, 0, hub_ip); > + WREG32_RLC_NO_KIQ(sem, 0, hub_ip); > > spin_unlock(&adev->gmc.invalidate_lock); > > - if (i < adev->usec_timeout) > - return; > - > - DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub); > -} > - > -/** > - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback > - * > - * @adev: amdgpu_device pointer > - * @vmid: vm instance to flush > - * @vmhub: vmhub type > - * @flush_type: the flush type > - * > - * Flush the TLB for the requested page table. > - */ > -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > - uint32_t vmhub, uint32_t flush_type) > -{ > - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > - struct dma_fence *fence; > - struct amdgpu_job *job; > - > - int r; > - > - /* flush hdp cache */ > - adev->hdp.funcs->flush_hdp(adev, NULL); > - > - /* For SRIOV run time, driver shouldn't access the register through MMIO > - * Directly use kiq to do the vm invalidation instead > - */ > - if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && > - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && > - down_read_trylock(&adev->reset_domain->sem)) { > - struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; > - const unsigned int eng = 17; > - u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); > - u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; > - u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; > - > - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, > - 1 << vmid); > - > - up_read(&adev->reset_domain->sem); > - return; > - } > - > - mutex_lock(&adev->mman.gtt_window_lock); > - > - if (vmhub == AMDGPU_MMHUB0(0)) { > - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); > - mutex_unlock(&adev->mman.gtt_window_lock); > - return; > - } > - > - BUG_ON(vmhub != AMDGPU_GFXHUB(0)); > - > - if (!adev->mman.buffer_funcs_enabled || > - !adev->ib_pool_ready || > - amdgpu_in_reset(adev) || > - ring->sched.ready == false) { > - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); > - mutex_unlock(&adev->mman.gtt_window_lock); > - return; > - } > - > - /* The SDMA on Navi has a bug which can theoretically result in memory > - * corruption if an invalidation happens at the same time as an VA > - * translation. Avoid this by doing the invalidation from the SDMA > - * itself. > - */ > - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, > - AMDGPU_FENCE_OWNER_UNDEFINED, > - 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, > - &job); > - if (r) > - goto error_alloc; > - > - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); > - job->vm_needs_flush = true; > - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; > - amdgpu_ring_pad_ib(ring, &job->ibs[0]); > - fence = amdgpu_job_submit(job); > - > - mutex_unlock(&adev->mman.gtt_window_lock); > - > - dma_fence_wait(fence, false); > - dma_fence_put(fence); > - > - return; > - > -error_alloc: > - mutex_unlock(&adev->mman.gtt_window_lock); > - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); > + if (i >= adev->usec_timeout) > + DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub); > } > > /** > -- > 2.34.1 >