Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm

"Das, Nirmoy" <nirmoy.das@xxxxxxx> · Wed, 23 Jun 2021 16:54:26 +0200

On 6/23/2021 3:40 PM, Christian König wrote:


Am 23.06.21 um 15:11 schrieb Das, Nirmoy:

On 6/23/2021 2:50 PM, Christian König wrote:


Am 23.06.21 um 14:25 schrieb Nirmoy Das:
Replace idr with xarray as we actually need hash functionality.
Cleanup code related to vm pasid by adding helper function.

Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 
+++++++++++--------------
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
  2 files changed, 60 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index be841d62a1d4..e047e56a4be2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
      struct dma_fence_cb cb;
  };
  +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
+                   struct amdgpu_vm *vm,
+                   unsigned long pasid)

Some kerneldoc please describing why we have that function.


Alright.



+{
+    unsigned long flags;
+    int r;
+
+    if (pasid) {

You should probably reorder the code so that the old pasid is first 
removed and then eventually the new one added.

+ xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+        r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
+                      GFP_ATOMIC));
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);

As far as I can see this can just use xa_store() which also drops 
the need for GFP_ATOMIC here.


Do we need to have this irqsave/restore to keep passids safe for 
amdgpu_vm_handle_fault() ?

No, we need the VM safe not the pasid.


Would spin_lock_irq be enough to keep the VM safe then I can use 
xa_store_irq() and remove some extra locking code?



xa_store() takes the spinlock without irq flags so I wanted to keep 
old behavior.

Yeah, that's indeed problematic. You need to keep that straight or 
lockdep will complain.

IIRC there is also a function to reserve an entry before you take the 
lock. Maybe use that one.


xa_reserve() also takes a spin lock so I think this won't work either 
with gfp_kernel flag.






+        if (r < 0)
+            return r;
+    } else {
+        unsigned long index;
+        struct amdgpu_vm *res;
+
+        xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+        xa_for_each(&adev->vm_manager.pasids, index, res) {
+            if (res == vm) {
+                __xa_erase(&adev->vm_manager.pasids, index);
+                break;
+            }
+        }
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);

That is really ugly, why is that necessary?

Do you mean the lock around xa_for_each() ? I think I can just used 
lock around __xa_erase() or just xa_erase() if just simple spinlock 
without flags is enough.

I mean why you use xa_for_each() here?


amdgpu_vm_set_pasid() removes pasid:vm entry when pasid 0 is passed. I 
need xa_for_each()  to find the index of that vm pointer

so that I can pass that to __xa_erase(). I couldn't find an API which 
removes an entry  by value.


Regards,

Nirmoy


Just __xa_erase should be enough.

Christian.



Regards,

Nirmoy



Christian

+    }
+
+    vm->pasid = pasid;
+
+    return 0;
+}
+
  /*
   * vm eviction_lock can be taken in MMU notifiers. Make sure no 
reclaim-FS
   * happens while holding this lock anywhere to prevent deadlocks 
when
@@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device 
*adev, struct amdgpu_vm *vm, u32 pasid)
        amdgpu_bo_unreserve(vm->root.bo);
  -    if (pasid) {
-        unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
pasid + 1,
-                  GFP_ATOMIC);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-        if (r < 0)
-            goto error_free_root;
-
-        vm->pasid = pasid;
-    }
+    r = amdgpu_vm_set_pasid(adev, vm, pasid);
+    if (r)
+        goto error_free_root;
        INIT_KFIFO(vm->faults);
  @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,
      if (r)
          goto unreserve_bo;
  -    if (pasid) {
-        unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
pasid + 1,
-                  GFP_ATOMIC);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-
-        if (r == -ENOSPC)
-            goto unreserve_bo;
-        r = 0;
-    }
+    /* remove previous {pasid:vm} entry first */
+    r = amdgpu_vm_set_pasid(adev, vm, 0);
+    r = amdgpu_vm_set_pasid(adev, vm, pasid);
+    if (r)
+        goto unreserve_bo;
        /* Check if PD needs to be reinitialized and do it before
       * changing any other state, in case it fails.
@@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,
                         to_amdgpu_bo_vm(vm->root.bo),
                         false);
          if (r)
-            goto free_idr;
+            goto free_pasid_entry;
      }
        /* Update VM state */
@@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,
          r = amdgpu_bo_sync_wait(vm->root.bo,
                      AMDGPU_FENCE_OWNER_UNDEFINED, true);
          if (r)
-            goto free_idr;
+            goto free_pasid_entry;
            vm->update_funcs = &amdgpu_vm_cpu_funcs;
      } else {
@@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,
      vm->last_update = NULL;
      vm->is_compute_context = true;
  -    if (vm->pasid) {
-        unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-        vm->pasid = 0;
-    }
-
      /* Free the shadow bo for compute VM */
amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
  -    if (pasid)
-        vm->pasid = pasid;
-
      goto unreserve_bo;
  -free_idr:
-    if (pasid) {
-        unsigned long flags;
+free_pasid_entry:
+    amdgpu_vm_set_pasid(adev, vm, 0);
  - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-        idr_remove(&adev->vm_manager.pasid_idr, pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-    }
  unreserve_bo:
      amdgpu_bo_unreserve(vm->root.bo);
      return r;
@@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,
   */
  void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
  {
-    if (vm->pasid) {
-        unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-    }
-    vm->pasid = 0;
+    amdgpu_vm_set_pasid(adev, vm, 0);
      vm->is_compute_context = false;
  }
  @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
*adev, struct amdgpu_vm *vm)
        root = amdgpu_bo_ref(vm->root.bo);
      amdgpu_bo_reserve(root, true);
-    if (vm->pasid) {
-        unsigned long flags;
-
- spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-        vm->pasid = 0;
-    }
-
+    amdgpu_vm_set_pasid(adev, vm, 0);
      dma_fence_wait(vm->last_unlocked, false);
      dma_fence_put(vm->last_unlocked);
  @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct 
amdgpu_device *adev)
      adev->vm_manager.vm_update_mode = 0;
  #endif
  -    idr_init(&adev->vm_manager.pasid_idr);
-    spin_lock_init(&adev->vm_manager.pasid_lock);
+    xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
  }
    /**
@@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct 
amdgpu_device *adev)
   */
  void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
  {
- WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
-    idr_destroy(&adev->vm_manager.pasid_idr);
+    WARN_ON(!xa_empty(&adev->vm_manager.pasids));
+    xa_destroy(&adev->vm_manager.pasids);
        amdgpu_vmid_mgr_fini(adev);
  }
@@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct 
amdgpu_device *adev, u32 pasid,
      struct amdgpu_vm *vm;
      unsigned long flags;
  - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+    xa_lock_irqsave(&adev->vm_manager.pasids, flags);
  -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+    vm = xa_load(&adev->vm_manager.pasids, pasid);
      if (vm)
          *task_info = vm->task_info;
  - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+    xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
  }
    /**
@@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct 
amdgpu_device *adev, u32 pasid,
      struct amdgpu_vm *vm;
      int r;
  - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
-    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+    vm = xa_load(&adev->vm_manager.pasids, pasid);
      if (vm) {
          root = amdgpu_bo_ref(vm->root.bo);
          is_compute_context = vm->is_compute_context;
      } else {
          root = NULL;
      }
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
+    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
        if (!root)
          return false;
@@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct 
amdgpu_device *adev, u32 pasid,
          goto error_unref;
        /* Double check that the VM still exists */
-    spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
-    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+    vm = xa_load(&adev->vm_manager.pasids, pasid);
      if (vm && vm->root.bo != root)
          vm = NULL;
- spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
+    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
      if (!vm)
          goto error_unlock;
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ddb85a85cbba..31c467764162 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
      /* PASID to VM mapping, will be used in interrupt context to
       * look up VM of a page fault
       */
-    struct idr                pasid_idr;
-    spinlock_t                pasid_lock;
+    struct xarray                pasids;
  };
    struct amdgpu_bo_va_mapping;


_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx