Re: [PATCH v3 1/2] drm/amdgpu: Acquire ttm locks for dmaunmap

Christian König <ckoenig.leichtzumerken@xxxxxxxxx> · Thu, 26 Oct 2023 08:06:49 +0200

Am 25.10.23 um 20:45 schrieb Felix Kuehling:
On 2023-10-25 02:12, Christian König wrote:
Am 24.10.23 um 21:20 schrieb David Francis:
dmaunmap can call ttm_bo_validate, which expects the
ttm dma_resv to be held.

Well first of all the dma_resv object isn't related to TTM.


Acquire the locks in amdgpu_amdkfd_gpuvm_dmaunmap_mem.

Because the dmaunmap step can now fail, two new numbers
need to be tracked. n_dmaunmap_success tracks the number
of devices that have completed dmaunmap. If a device fails
to dmaunmap due to a signal interrupt, n_dmaunmap_bos tracks
the number of bos on that device that were successfully
dmaunmapped.

Track those values in struct kgd_mem.

This failure can also cause the sync_memory step of the ioctl
to be repeated; it is idempotent, so this should not cause any issues.

Signed-off-by: David Francis <David.Francis@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  6 ++++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 
+++++++++++++++----
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 19 ++++++++++-----
  3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 3ad8dc523b42..c60564ec4312 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -86,6 +86,10 @@ struct kgd_mem {
        bool aql_queue;
      bool is_imported;
+
+    /* Used to track successful dmaunmap across retries in unmap 
ioctl */
+    uint32_t n_dmaunmap_success;
+    uint32_t n_dmaunmap_bos;
  };
    /* KFD Memory Eviction */
@@ -302,7 +306,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct 
amdgpu_device *adev,
                        struct kgd_mem *mem, void *drm_priv);
  int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
          struct amdgpu_device *adev, struct kgd_mem *mem, void 
*drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
*drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
*drm_priv);
  int amdgpu_amdkfd_gpuvm_sync_memory(
          struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
  int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 54f31a420229..c431132d7cc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2102,21 +2102,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
      return ret;
  }
  -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
*drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
*drm_priv)
  {
      struct kfd_mem_attachment *entry;
      struct amdgpu_vm *vm;
+    int ret;
+    int i = 0;
        vm = drm_priv_to_vm(drm_priv);
        mutex_lock(&mem->lock);
        list_for_each_entry(entry, &mem->attachments, list) {
-        if (entry->bo_va->base.vm == vm)
-            kfd_mem_dmaunmap_attachment(mem, entry);
-    }
+        if (i >= mem->n_dmaunmap_bos) {
+            ret = amdgpu_bo_reserve(entry->bo_va->base.bo, false);
+            if (ret) {
+                mem->n_dmaunmap_bos = i;
+                goto out;
+            }
+
+            if (entry->bo_va->base.vm == vm)
+                kfd_mem_dmaunmap_attachment(mem, entry);
  + amdgpu_bo_unreserve(entry->bo_va->base.bo);
+        }
+        i++;
+    }

WOW, big NAK to that!

You are essentially re-inventing the locking multiple BOs at the same 
time, please don't do that. Instead use dma_exec or ttm_exec_buf for 
that.

I don't think that's the intention here. We're not locking multiple 
BOs at the same time. The purpose of all this counting is to safely 
handle ioctl restart for signal handling without dmaunmapping the same 
thing twice.

Well, it's not mandatory to lock multiple BOs at the same time but it 
solves the problem that this shouldn't be interrupted.


That said, I'm not a fan of this counting approach either and 
suggested a simpler way of doing this by adding a flag in the 
kfd_mem_attachment structure.

When we look strictly at the rules for IOCTLs and restarting them then 
that counting approach is not really allowed either.

Kernel operations should either completely fail, fully complete or 
explicitly return how much they completed (e.g. how many bytes 
transferred for example). That we only partially complete and track that 
state inside the kernel is usually a no-go.

That said is it possible that userspace tries to do this operation 
twice? If yes what happens?

For this local problem the alternative to using drm_exec or ttm_exec_* 
would be to use non-interruptible waits#.

Regards,
Christian.


Regards,
  Felix



This also avoids all the fail handling.

Regards,
Christian.

+    mem->n_dmaunmap_bos = 0;
+out:
      mutex_unlock(&mem->lock);
+    return ret;
  }
    int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 06988cf1db51..66dee67ad859 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1366,7 +1366,7 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  {
      struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
      struct kfd_process_device *pdd, *peer_pdd;
-    void *mem;
+    struct kgd_mem *mem;
      long err = 0;
      uint32_t *devices_arr = NULL, i;
      bool flush_tlb;
@@ -1400,7 +1400,7 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
          goto bind_process_to_device_failed;
      }
  -    mem = kfd_process_device_translate_handle(pdd,
+    mem = (struct kgd_mem *)kfd_process_device_translate_handle(pdd,
                          GET_IDR_HANDLE(args->handle));
      if (!mem) {
          err = -ENOMEM;
@@ -1414,7 +1414,7 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
              goto get_mem_obj_from_handle_failed;
          }
          err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-            peer_pdd->dev->adev, (struct kgd_mem *)mem, 
peer_pdd->drm_priv);
+            peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
          if (err) {
              pr_err("Failed to unmap from gpu %d/%d\n",
                     i, args->n_devices);
@@ -1426,7 +1426,7 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
      flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
      if (flush_tlb) {
          err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
-                (struct kgd_mem *) mem, true);
+                mem, true);
          if (err) {
              pr_debug("Sync memory failed, wait interrupted by user 
signal\n");
              goto sync_memory_failed;
@@ -1434,7 +1434,7 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
      }
        /* Flush TLBs after waiting for the page table updates to 
complete */
-    for (i = 0; i < args->n_devices; i++) {
+    for (i = mem->n_dmaunmap_success; i < args->n_devices; i++) {
          peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
          if (WARN_ON_ONCE(!peer_pdd))
              continue;
@@ -1442,8 +1442,14 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
              kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
            /* Remove dma mapping after tlb flush to avoid 
IO_PAGE_FAULT */
-        amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+        err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, 
peer_pdd->drm_priv);
+        if (err) {
+            pr_debug("DMA unmapping failed, acquire interrupted by 
user signal\n");
+            goto dmaunmap_failed;
+        }
+        mem->n_dmaunmap_success = i+1;
      }
+    mem->n_dmaunmap_success = 0;
        mutex_unlock(&p->mutex);
  @@ -1455,6 +1461,7 @@ static int 
kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
  get_mem_obj_from_handle_failed:
  unmap_memory_from_gpu_failed:
  sync_memory_failed:
+dmaunmap_failed:
      mutex_unlock(&p->mutex);
  copy_from_user_failed:
      kfree(devices_arr);