This patch moves the code to flush GPU TLB using SDMA ring and splits it into two parts: - a general purpose function to flush GPU TLB using any valid GPU ring. - a wrapper which consumes this function using SDMA ring. The idea is that KFD/KGD code should be able to flush GPU TLB using SDMA ring if they want to. v2: Addressed review comments on RFC - Make the TLB flush function generic, and add a SDMA wrapper to it (Christian). Cc: Christian Koenig <christian.koenig@xxxxxxx> Cc: Philip Yang <philip.yang@xxxxxxx> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 34 +++++----------------- 5 files changed, 67 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 231ca06bc9c7..05ffeb704dc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -30,6 +30,28 @@ /* SDMA CSA reside in the 3rd page of CSA */ #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2) +/** + * amdgpu_sdma_flush_tlb - flush gpu TLB using SDMA ring + * + * @adev: amdgpu device pointer + * + * return: returns 0 on success. + */ +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev) +{ + struct dma_fence *fence; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + + fence = amdgpu_ttm_flush_tlb(ring); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + return 0; + } + + return -1; +} + /* * GPU SDMA IP block helpers function. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index fc8528812598..739077862a7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -130,5 +130,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c5ef7f7bdc15..1248d1dd5abc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -81,6 +81,42 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, false, size_in_page); } +/** + * amdgpu_ttm_flush_tlb - flush gpu TLB using a GPU ring + * + * @ring: ring to submit the flushing job to + * + * return: returns dma fence which must be put by caller + */ +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring) +{ + struct amdgpu_job *job; + struct dma_fence *fence; + struct amdgpu_device *adev = ring->adev; + int r; + + mutex_lock(&adev->mman.gtt_window_lock); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + &job); + if (r) + goto error_alloc; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); + job->vm_needs_flush = true; + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + fence = amdgpu_job_submit(job); + + mutex_unlock(&adev->mman.gtt_window_lock); + return fence; + +error_alloc: + mutex_unlock(&adev->mman.gtt_window_lock); + return NULL; +} + /** * amdgpu_evict_flags - Compute placement flags * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e2cd5894afc9..86ba70d2eb53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -200,5 +200,6 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type); void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ab2556ca984e..9892b155d1ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -52,6 +52,7 @@ #include "athub_v2_1.h" #include "amdgpu_reset.h" +#include "amdgpu_sdma.h" #if 0 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = @@ -332,10 +333,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct dma_fence *fence; - struct amdgpu_job *job; - - int r; /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); @@ -378,34 +375,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; } - /* The SDMA on Navi has a bug which can theoretically result in memory + mutex_unlock(&adev->mman.gtt_window_lock); + + /* + * The SDMA on Navi has a bug which can theoretically result in memory * corruption if an invalidation happens at the same time as an VA * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, - 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); - if (r) - goto error_alloc; - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); - job->vm_needs_flush = true; - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - fence = amdgpu_job_submit(job); - - mutex_unlock(&adev->mman.gtt_window_lock); - - dma_fence_wait(fence, false); - dma_fence_put(fence); - - return; - -error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); + if (amdgpu_sdma_flush_gpu_tlb(adev)) + DRM_ERROR("Error flushing GPU TLB using the SDMA !\n"); } /** -- 2.40.1