Re: [RFC PATCH 1/2] drm/amdkfd: Move TLB flushing logic into amdgpu

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





Am 13.03.23 um 14:21 schrieb Felix Kuehling:

Am 2023-03-13 um 03:36 schrieb Christian König:
Am 10.03.23 um 23:16 schrieb Felix Kuehling:
This will make it possible for amdgpu GEM ioctls to flush TLBs on compute
VMs.

This removes VMID-based TLB flushing and always uses PASID-based
flushing. This still works because it scans the VMID-PASID mapping
registers to find the right VMID. It's only slightly less efficient. This
is not a production use case.

On the one hand it looks nice that we can now flush based on the pasid without having the NO_HWS dependency (I hope that this was intentional).

The intention was to be able to trigger PASID TLB flushes from AMDGPU. I need it for flushing compute VMs when their mappings are modified by the GEM VA ioctl. Removing the NO_HWS dependency was a useful simplification to get there.



On the other hand I really don't like to have any variables in the amdgpu_vm structure which are not worked with by the VM code itself.

So what's the solution? Move that code into amdgpu_vm.c? Or find a different data structure to store the sequence number? I guess I could put it in struct amdgpu_fpriv, which amdgpu_vm is part of.

Yeah, moving more of that into the VM code would be one possibility.

What we should especially do is implementing this TLB flush fence idea, I've already pinged Shashank for the MES side and will probably assign Amar to implement this for older kernels.



That already backfired with the pd_phys_addr before.

Can you clarify this concern? I'm not aware of any issues with pd_phys_addr.

Shashank confused this with the addr he should use for the MES and it took us a moment to realize that this value isn't updated in the VM code.

Regards,
Christian.


Thanks,
  Felix



Regards,
Christian.


Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 30 +++++++++-------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  6 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  1 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  9 +++++--
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   | 28 --------------------
  5 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8816853e50c0..dcbd28e99b5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -726,31 +726,25 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
      return false;
  }
  -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-                     uint16_t vmid)
-{
-    if (adev->family == AMDGPU_FAMILY_AI) {
-        int i;
-
-        for (i = 0; i < adev->num_vmhubs; i++)
-            amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-    } else {
-        amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
-    }
-
-    return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                      uint16_t pasid, enum TLB_FLUSH_TYPE flush_type)
+int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                enum TLB_FLUSH_TYPE type)
  {
+    uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
      bool all_hub = false;
  +    /*
+     * It can be that we race and lose here, but that is extremely unlikely +     * and the worst thing which could happen is that we flush the changes
+     * into the TLB once more which is harmless.
+     */
+    if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+        return 0;
+
      if (adev->family == AMDGPU_FAMILY_AI ||
          adev->family == AMDGPU_FAMILY_RV)
          all_hub = true;
  -    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, type, all_hub);
  }
    bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 01ba3589b60a..dcaf69fd833c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -157,10 +157,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
                  uint32_t *ib_cmd, uint32_t ib_len);
  void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
  bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-                uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
+int amdgpu_amdkfd_flush_tlb(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                enum TLB_FLUSH_TYPE type);
    bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 03a3314e5b43..171de7da2959 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -290,6 +290,7 @@ struct amdgpu_vm {
      /* Last finished delayed update */
      atomic64_t        tlb_seq;
      struct dma_fence    *last_tlb_flush;
+    atomic64_t        kfd_last_flushed_seq;
        /* Last unlocked submission to the scheduler entities */
      struct dma_fence    *last_unlocked;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bfa30d12406b..e029129308e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -715,7 +715,6 @@ struct kfd_process_device {
      /* VM context for GPUVM allocations */
      struct file *drm_file;
      void *drm_priv;
-    atomic64_t tlb_seq;
        /* GPUVM allocations storage */
      struct idr alloc_idr;
@@ -1344,7 +1343,13 @@ void kfd_signal_reset_event(struct kfd_dev *dev);     void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);   -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type); +static inline void kfd_flush_tlb(struct kfd_process_device *pdd,                             enum TLB_FLUSH_TYPE type)
+{
+    struct amdgpu_device *adev = pdd->dev->adev;
+    struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+    amdgpu_amdkfd_flush_tlb(adev, vm, type);
+}
    static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
  {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ebabe92f7edb..48d7d30eeb24 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1591,7 +1591,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
          return ret;
      }
      pdd->drm_priv = drm_file->private_data;
-    atomic64_set(&pdd->tlb_seq, 0);
        ret = kfd_process_device_reserve_ib_mem(pdd);
      if (ret)
@@ -1994,33 +1993,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
                     KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
  }
  -void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
-    struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
-    uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
-    struct kfd_dev *dev = pdd->dev;
-
-    /*
-     * It can be that we race and lose here, but that is extremely unlikely -     * and the worst thing which could happen is that we flush the changes
-     * into the TLB once more which is harmless.
-     */
-    if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
-        return;
-
-    if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
-        /* Nothing to flush until a VMID is assigned, which
-         * only happens when the first queue is created.
-         */
-        if (pdd->qpd.vmid)
-            amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
-                            pdd->qpd.vmid);
-    } else {
-        amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
-                    pdd->process->pasid, type);
-    }
-}
-
  struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
  {
      int i;





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux