Re: [PATCH v2] drm/amdgpu: flush GPU TLB using SDMA ring

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hey Alex,

On 09/06/2023 19:08, Alex Deucher wrote:
On Fri, Jun 9, 2023 at 11:05 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote:
This patch moves the code to flush GPU TLB using SDMA ring and
splits it into two parts:
- a general purpose function to flush GPU TLB using any valid GPU ring.
- a wrapper which consumes this function using SDMA ring.

The idea is that KFD/KGD code should be able to flush GPU TLB
using SDMA ring if they want to.

v2: Addressed review comments on RFC
     - Make the TLB flush function generic, and add a SDMA wrapper
       to it (Christian).

Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: Philip Yang <philip.yang@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 36 ++++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 34 +++++-----------------
  5 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 231ca06bc9c7..05ffeb704dc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -30,6 +30,28 @@
  /* SDMA CSA reside in the 3rd page of CSA */
  #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)

+/**
+ * amdgpu_sdma_flush_tlb - flush gpu TLB using SDMA ring
+ *
+ * @adev: amdgpu device pointer
+ *
+ * return: returns 0 on success.
+ */
+int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev)
+{
+       struct dma_fence *fence;
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
I think it would be better to put this function in amdgpu_ttm.c or
some amdgpu_vm.c.  It doesn't really have anything to do with SDMA per
se.  buffer_funcs_ring could point to any engine that could handle
buffer ops.  It just happens to be SDMA on most chips.

Makes sense, I will probably remove the SDMA layer itself and just keep it a generic function.

- Shashank

+
+       fence = amdgpu_ttm_flush_tlb(ring);
+       if (fence) {
+               dma_fence_wait(fence, false);
+               dma_fence_put(fence);
+               return 0;
+       }
+
+       return -1;
Please use an appropriate error code here rather than -1.

Noted,

- Shashank

Alex

+}
+
  /*
   * GPU SDMA IP block helpers function.
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index fc8528812598..739077862a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -130,5 +130,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
          bool duplicate);
  void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
  int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5ef7f7bdc15..1248d1dd5abc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -81,6 +81,42 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
                                   false, size_in_page);
  }

+/**
+ * amdgpu_ttm_flush_tlb - flush gpu TLB using a GPU ring
+ *
+ * @ring: ring to submit the flushing job to
+ *
+ * return: returns dma fence which must be put by caller
+ */
+struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring)
+{
+       struct amdgpu_job *job;
+       struct dma_fence *fence;
+       struct amdgpu_device *adev = ring->adev;
+       int r;
+
+       mutex_lock(&adev->mman.gtt_window_lock);
+       r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+                                    AMDGPU_FENCE_OWNER_UNDEFINED,
+                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+                                    &job);
+       if (r)
+               goto error_alloc;
+
+       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+       job->vm_needs_flush = true;
+       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+       fence = amdgpu_job_submit(job);
+
+       mutex_unlock(&adev->mman.gtt_window_lock);
+       return fence;
+
+error_alloc:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+       return NULL;
+}
+
  /**
   * amdgpu_evict_flags - Compute placement flags
   *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e2cd5894afc9..86ba70d2eb53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -200,5 +200,6 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
  int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);

  void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
+struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring);

  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index ab2556ca984e..9892b155d1ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -52,6 +52,7 @@
  #include "athub_v2_1.h"

  #include "amdgpu_reset.h"
+#include "amdgpu_sdma.h"

  #if 0
  static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
@@ -332,10 +333,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
                                         uint32_t vmhub, uint32_t flush_type)
  {
         struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-       struct dma_fence *fence;
-       struct amdgpu_job *job;
-
-       int r;

         /* flush hdp cache */
         adev->hdp.funcs->flush_hdp(adev, NULL);
@@ -378,34 +375,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
                 return;
         }

-       /* The SDMA on Navi has a bug which can theoretically result in memory
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
+       /*
+        * The SDMA on Navi has a bug which can theoretically result in memory
          * corruption if an invalidation happens at the same time as an VA
          * translation. Avoid this by doing the invalidation from the SDMA
          * itself.
          */
-       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED,
-                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
-                                    &job);
-       if (r)
-               goto error_alloc;

-       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
-       job->vm_needs_flush = true;
-       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
-       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-       fence = amdgpu_job_submit(job);
-
-       mutex_unlock(&adev->mman.gtt_window_lock);
-
-       dma_fence_wait(fence, false);
-       dma_fence_put(fence);
-
-       return;
-
-error_alloc:
-       mutex_unlock(&adev->mman.gtt_window_lock);
-       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+       if (amdgpu_sdma_flush_gpu_tlb(adev))
+               DRM_ERROR("Error flushing GPU TLB using the SDMA !\n");
  }

  /**
--
2.40.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux