Re: [PATCH v2] drm/amdgpu: flush GPU TLB using SDMA ring

Alex Deucher <alexdeucher@xxxxxxxxx> · Fri, 9 Jun 2023 13:08:18 -0400

On Fri, Jun 9, 2023 at 11:05 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote:
>
> This patch moves the code to flush GPU TLB using SDMA ring and
> splits it into two parts:
> - a general purpose function to flush GPU TLB using any valid GPU ring.
> - a wrapper which consumes this function using SDMA ring.
>
> The idea is that KFD/KGD code should be able to flush GPU TLB
> using SDMA ring if they want to.
>
> v2: Addressed review comments on RFC
>     - Make the TLB flush function generic, and add a SDMA wrapper
>       to it (Christian).
>
> Cc: Christian Koenig <christian.koenig@xxxxxxx>
> Cc: Philip Yang <philip.yang@xxxxxxx>
> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 36 ++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 34 +++++-----------------
>  5 files changed, 67 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 231ca06bc9c7..05ffeb704dc4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -30,6 +30,28 @@
>  /* SDMA CSA reside in the 3rd page of CSA */
>  #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
>
> +/**
> + * amdgpu_sdma_flush_tlb - flush gpu TLB using SDMA ring
> + *
> + * @adev: amdgpu device pointer
> + *
> + * return: returns 0 on success.
> + */
> +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev)
> +{
> +       struct dma_fence *fence;
> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;

I think it would be better to put this function in amdgpu_ttm.c or
some amdgpu_vm.c.  It doesn't really have anything to do with SDMA per
se.  buffer_funcs_ring could point to any engine that could handle
buffer ops.  It just happens to be SDMA on most chips.

> +
> +       fence = amdgpu_ttm_flush_tlb(ring);
> +       if (fence) {
> +               dma_fence_wait(fence, false);
> +               dma_fence_put(fence);
> +               return 0;
> +       }
> +
> +       return -1;

Please use an appropriate error code here rather than -1.

Alex

> +}
> +
>  /*
>   * GPU SDMA IP block helpers function.
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index fc8528812598..739077862a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -130,5 +130,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
>          bool duplicate);
>  void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
>  int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
> +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev);
>
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c5ef7f7bdc15..1248d1dd5abc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -81,6 +81,42 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
>                                   false, size_in_page);
>  }
>
> +/**
> + * amdgpu_ttm_flush_tlb - flush gpu TLB using a GPU ring
> + *
> + * @ring: ring to submit the flushing job to
> + *
> + * return: returns dma fence which must be put by caller
> + */
> +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring)
> +{
> +       struct amdgpu_job *job;
> +       struct dma_fence *fence;
> +       struct amdgpu_device *adev = ring->adev;
> +       int r;
> +
> +       mutex_lock(&adev->mman.gtt_window_lock);
> +       r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
> +                                    AMDGPU_FENCE_OWNER_UNDEFINED,
> +                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> +                                    &job);
> +       if (r)
> +               goto error_alloc;
> +
> +       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> +       job->vm_needs_flush = true;
> +       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> +       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> +       fence = amdgpu_job_submit(job);
> +
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +       return fence;
> +
> +error_alloc:
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +       return NULL;
> +}
> +
>  /**
>   * amdgpu_evict_flags - Compute placement flags
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index e2cd5894afc9..86ba70d2eb53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -200,5 +200,6 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>  int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
>
>  void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
> +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring);
>
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index ab2556ca984e..9892b155d1ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -52,6 +52,7 @@
>  #include "athub_v2_1.h"
>
>  #include "amdgpu_reset.h"
> +#include "amdgpu_sdma.h"
>
>  #if 0
>  static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
> @@ -332,10 +333,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                                         uint32_t vmhub, uint32_t flush_type)
>  {
>         struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -       struct dma_fence *fence;
> -       struct amdgpu_job *job;
> -
> -       int r;
>
>         /* flush hdp cache */
>         adev->hdp.funcs->flush_hdp(adev, NULL);
> @@ -378,34 +375,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                 return;
>         }
>
> -       /* The SDMA on Navi has a bug which can theoretically result in memory
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +
> +       /*
> +        * The SDMA on Navi has a bug which can theoretically result in memory
>          * corruption if an invalidation happens at the same time as an VA
>          * translation. Avoid this by doing the invalidation from the SDMA
>          * itself.
>          */
> -       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,
> -                                    AMDGPU_FENCE_OWNER_UNDEFINED,
> -                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> -                                    &job);
> -       if (r)
> -               goto error_alloc;
>
> -       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> -       job->vm_needs_flush = true;
> -       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> -       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> -       fence = amdgpu_job_submit(job);
> -
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> -
> -       dma_fence_wait(fence, false);
> -       dma_fence_put(fence);
> -
> -       return;
> -
> -error_alloc:
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> -       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +       if (amdgpu_sdma_flush_gpu_tlb(adev))
> +               DRM_ERROR("Error flushing GPU TLB using the SDMA !\n");
>  }
>
>  /**
> --
> 2.40.1
>