Re: [PATCH 02/11] drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb

Alex Deucher <alexdeucher@xxxxxxxxx> · Tue, 5 Sep 2023 16:52:21 -0400

On Tue, Sep 5, 2023 at 2:20 AM Christian König
<ckoenig.leichtzumerken@xxxxxxxxx> wrote:
>
> Move the SDMA workaround necessary for Navi 1x into a higher layer.

You could split out the register offsets code cleanup into a separate
patch.  Either way, the patch is:
Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx>

>
> Signed-off-by: Christian König <christian.koenig@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  |  48 +++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |   5 +-
>  drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c |   3 +
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 159 ++++++-----------------
>  4 files changed, 97 insertions(+), 118 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index d78bd9732543..857051093900 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -575,6 +575,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
>         return 0;
>  }
>
> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                             uint32_t vmhub, uint32_t flush_type)
> +{
> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +       struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> +       struct dma_fence *fence;
> +       struct amdgpu_job *job;
> +       int r;
> +
> +       if (!hub->sdma_invalidation_workaround || vmid ||
> +           !adev->mman.buffer_funcs_enabled ||
> +           !adev->ib_pool_ready || amdgpu_in_reset(adev) ||
> +           !ring->sched.ready) {
> +               adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
> +                                                  flush_type);
> +               return;
> +       }
> +
> +       /* The SDMA on Navi 1x has a bug which can theoretically result in memory
> +        * corruption if an invalidation happens at the same time as an VA
> +        * translation. Avoid this by doing the invalidation from the SDMA
> +        * itself at least for GART.
> +        */
> +       mutex_lock(&adev->mman.gtt_window_lock);
> +       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
> +                                    AMDGPU_FENCE_OWNER_UNDEFINED,
> +                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> +                                    &job);
> +       if (r)
> +               goto error_alloc;
> +
> +       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> +       job->vm_needs_flush = true;
> +       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> +       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> +       fence = amdgpu_job_submit(job);
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +
> +       dma_fence_wait(fence, false);
> +       dma_fence_put(fence);
> +
> +       return;
> +
> +error_alloc:
> +       mutex_unlock(&adev->mman.gtt_window_lock);
> +       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +}
> +
>  /**
>   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
>   * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index fdc25cd559b6..9e7df2f69123 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -117,6 +117,8 @@ struct amdgpu_vmhub {
>
>         uint32_t        vm_contexts_disable;
>
> +       bool            sdma_invalidation_workaround;
> +
>         const struct amdgpu_vmhub_funcs *vmhub_funcs;
>  };
>
> @@ -335,7 +337,6 @@ struct amdgpu_gmc {
>         u64 noretry_flags;
>  };
>
> -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
>  #define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
>         ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
>         ((adev), (pasid), (type), (allhub), (inst)))
> @@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
>  int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
>  void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
>  int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
> +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                             uint32_t vmhub, uint32_t flush_type);
>
>  extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
>  extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> index a041c6c970e1..8521c45e8f38 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
> @@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
>                 GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
>                 GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
>
> +       /* TODO: This is only needed on some Navi 1x revisions */
> +       hub->sdma_invalidation_workaround = true;
> +
>         hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index fa87a85e1017..1f70c57bcd69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -230,20 +230,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
>   * by the amdgpu vm/hsa code.
>   */
>
> -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
> -                                  unsigned int vmhub, uint32_t flush_type)
> +/**
> + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
> + *
> + * @adev: amdgpu_device pointer
> + * @vmid: vm instance to flush
> + * @vmhub: vmhub type
> + * @flush_type: the flush type
> + *
> + * Flush the TLB for the requested page table.
> + */
> +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> +                                       uint32_t vmhub, uint32_t flush_type)
>  {
>         bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
>         struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>         u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -       u32 tmp;
>         /* Use register 17 for GART */
>         const unsigned int eng = 17;
> -       unsigned int i;
>         unsigned char hub_ip = 0;
> +       u32 sem, req, ack;
> +       unsigned int i;
> +       u32 tmp;
>
> -       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
> -                  GC_HWIP : MMHUB_HWIP;
> +       sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
> +       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> +       /* flush hdp cache */
> +       adev->hdp.funcs->flush_hdp(adev, NULL);
> +
> +       /* For SRIOV run time, driver shouldn't access the register through MMIO
> +        * Directly use kiq to do the vm invalidation instead
> +        */
> +       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> +           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> +           down_read_trylock(&adev->reset_domain->sem)) {
> +               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> +                               1 << vmid);
> +               up_read(&adev->reset_domain->sem);
> +               return;
> +       }
> +
> +       hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
>
>         spin_lock(&adev->gmc.invalidate_lock);
>         /*
> @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>         if (use_semaphore) {
>                 for (i = 0; i < adev->usec_timeout; i++) {
>                         /* a read return value of 1 means semaphore acuqire */
> -                       tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -                                        hub->eng_distance * eng, hub_ip);
> -
> +                       tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
>                         if (tmp & 0x1)
>                                 break;
>                         udelay(1);
> @@ -269,9 +296,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
>         }
>
> -       WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
> -                         hub->eng_distance * eng,
> -                         inv_req, hub_ip);
> +       WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
>
>         /*
>          * Issue a dummy read to wait for the ACK register to be cleared
> @@ -279,14 +304,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>          */
>         if ((vmhub == AMDGPU_GFXHUB(0)) &&
>             (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
> -               RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
> -                                 hub->eng_distance * eng, hub_ip);
> +               RREG32_RLC_NO_KIQ(req, hub_ip);
>
>         /* Wait for ACK with a delay.*/
>         for (i = 0; i < adev->usec_timeout; i++) {
> -               tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
> -                                 hub->eng_distance * eng, hub_ip);
> -
> +               tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
>                 tmp &= 1 << vmid;
>                 if (tmp)
>                         break;
> @@ -296,109 +318,12 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
>
>         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>         if (use_semaphore)
> -               /*
> -                * add semaphore release after invalidation,
> -                * write with 0 means semaphore release
> -                */
> -               WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> -                                 hub->eng_distance * eng, 0, hub_ip);
> +               WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
>
>         spin_unlock(&adev->gmc.invalidate_lock);
>
> -       if (i < adev->usec_timeout)
> -               return;
> -
> -       DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
> -}
> -
> -/**
> - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
> - *
> - * @adev: amdgpu_device pointer
> - * @vmid: vm instance to flush
> - * @vmhub: vmhub type
> - * @flush_type: the flush type
> - *
> - * Flush the TLB for the requested page table.
> - */
> -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> -                                       uint32_t vmhub, uint32_t flush_type)
> -{
> -       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -       struct dma_fence *fence;
> -       struct amdgpu_job *job;
> -
> -       int r;
> -
> -       /* flush hdp cache */
> -       adev->hdp.funcs->flush_hdp(adev, NULL);
> -
> -       /* For SRIOV run time, driver shouldn't access the register through MMIO
> -        * Directly use kiq to do the vm invalidation instead
> -        */
> -       if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
> -           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -           down_read_trylock(&adev->reset_domain->sem)) {
> -               struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
> -               const unsigned int eng = 17;
> -               u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
> -               u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> -               u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> -
> -               amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
> -                               1 << vmid);
> -
> -               up_read(&adev->reset_domain->sem);
> -               return;
> -       }
> -
> -       mutex_lock(&adev->mman.gtt_window_lock);
> -
> -       if (vmhub == AMDGPU_MMHUB0(0)) {
> -               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
> -               mutex_unlock(&adev->mman.gtt_window_lock);
> -               return;
> -       }
> -
> -       BUG_ON(vmhub != AMDGPU_GFXHUB(0));
> -
> -       if (!adev->mman.buffer_funcs_enabled ||
> -           !adev->ib_pool_ready ||
> -           amdgpu_in_reset(adev) ||
> -           ring->sched.ready == false) {
> -               gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
> -               mutex_unlock(&adev->mman.gtt_window_lock);
> -               return;
> -       }
> -
> -       /* The SDMA on Navi has a bug which can theoretically result in memory
> -        * corruption if an invalidation happens at the same time as an VA
> -        * translation. Avoid this by doing the invalidation from the SDMA
> -        * itself.
> -        */
> -       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
> -                                    AMDGPU_FENCE_OWNER_UNDEFINED,
> -                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> -                                    &job);
> -       if (r)
> -               goto error_alloc;
> -
> -       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> -       job->vm_needs_flush = true;
> -       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> -       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> -       fence = amdgpu_job_submit(job);
> -
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> -
> -       dma_fence_wait(fence, false);
> -       dma_fence_put(fence);
> -
> -       return;
> -
> -error_alloc:
> -       mutex_unlock(&adev->mman.gtt_window_lock);
> -       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +       if (i >= adev->usec_timeout)
> +               DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
>  }
>
>  /**
> --
> 2.34.1
>