On Fri, Jun 9, 2023 at 11:05 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote: > > This patch moves the code to flush GPU TLB using SDMA ring and > splits it into two parts: > - a general purpose function to flush GPU TLB using any valid GPU ring. > - a wrapper which consumes this function using SDMA ring. > > The idea is that KFD/KGD code should be able to flush GPU TLB > using SDMA ring if they want to. > > v2: Addressed review comments on RFC > - Make the TLB flush function generic, and add a SDMA wrapper > to it (Christian). > > Cc: Christian Koenig <christian.koenig@xxxxxxx> > Cc: Philip Yang <philip.yang@xxxxxxx> > Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 ++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 34 +++++----------------- > 5 files changed, 67 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > index 231ca06bc9c7..05ffeb704dc4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c > @@ -30,6 +30,28 @@ > /* SDMA CSA reside in the 3rd page of CSA */ > #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2) > > +/** > + * amdgpu_sdma_flush_tlb - flush gpu TLB using SDMA ring > + * > + * @adev: amdgpu device pointer > + * > + * return: returns 0 on success. > + */ > +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev) > +{ > + struct dma_fence *fence; > + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; I think it would be better to put this function in amdgpu_ttm.c or some amdgpu_vm.c. It doesn't really have anything to do with SDMA per se. buffer_funcs_ring could point to any engine that could handle buffer ops. It just happens to be SDMA on most chips. > + > + fence = amdgpu_ttm_flush_tlb(ring); > + if (fence) { > + dma_fence_wait(fence, false); > + dma_fence_put(fence); > + return 0; > + } > + > + return -1; Please use an appropriate error code here rather than -1. Alex > +} > + > /* > * GPU SDMA IP block helpers function. > */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > index fc8528812598..739077862a7d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h > @@ -130,5 +130,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, > bool duplicate); > void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev); > int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); > +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev); > > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index c5ef7f7bdc15..1248d1dd5abc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -81,6 +81,42 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, > false, size_in_page); > } > > +/** > + * amdgpu_ttm_flush_tlb - flush gpu TLB using a GPU ring > + * > + * @ring: ring to submit the flushing job to > + * > + * return: returns dma fence which must be put by caller > + */ > +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring) > +{ > + struct amdgpu_job *job; > + struct dma_fence *fence; > + struct amdgpu_device *adev = ring->adev; > + int r; > + > + mutex_lock(&adev->mman.gtt_window_lock); > + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, > + AMDGPU_FENCE_OWNER_UNDEFINED, > + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, > + &job); > + if (r) > + goto error_alloc; > + > + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); > + job->vm_needs_flush = true; > + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; > + amdgpu_ring_pad_ib(ring, &job->ibs[0]); > + fence = amdgpu_job_submit(job); > + > + mutex_unlock(&adev->mman.gtt_window_lock); > + return fence; > + > +error_alloc: > + mutex_unlock(&adev->mman.gtt_window_lock); > + return NULL; > +} > + > /** > * amdgpu_evict_flags - Compute placement flags > * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index e2cd5894afc9..86ba70d2eb53 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -200,5 +200,6 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, > int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type); > > void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); > +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring); > > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index ab2556ca984e..9892b155d1ef 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -52,6 +52,7 @@ > #include "athub_v2_1.h" > > #include "amdgpu_reset.h" > +#include "amdgpu_sdma.h" > > #if 0 > static const struct soc15_reg_golden golden_settings_navi10_hdp[] = > @@ -332,10 +333,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > uint32_t vmhub, uint32_t flush_type) > { > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > - struct dma_fence *fence; > - struct amdgpu_job *job; > - > - int r; > > /* flush hdp cache */ > adev->hdp.funcs->flush_hdp(adev, NULL); > @@ -378,34 +375,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, > return; > } > > - /* The SDMA on Navi has a bug which can theoretically result in memory > + mutex_unlock(&adev->mman.gtt_window_lock); > + > + /* > + * The SDMA on Navi has a bug which can theoretically result in memory > * corruption if an invalidation happens at the same time as an VA > * translation. Avoid this by doing the invalidation from the SDMA > * itself. > */ > - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity, > - AMDGPU_FENCE_OWNER_UNDEFINED, > - 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, > - &job); > - if (r) > - goto error_alloc; > > - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); > - job->vm_needs_flush = true; > - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; > - amdgpu_ring_pad_ib(ring, &job->ibs[0]); > - fence = amdgpu_job_submit(job); > - > - mutex_unlock(&adev->mman.gtt_window_lock); > - > - dma_fence_wait(fence, false); > - dma_fence_put(fence); > - > - return; > - > -error_alloc: > - mutex_unlock(&adev->mman.gtt_window_lock); > - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); > + if (amdgpu_sdma_flush_gpu_tlb(adev)) > + DRM_ERROR("Error flushing GPU TLB using the SDMA !\n"); > } > > /** > -- > 2.40.1 >