Re: 回复: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo

Christian König <christian.koenig@xxxxxxx> · Fri, 7 Feb 2020 14:52:47 +0100

Am 07.02.20 um 14:50 schrieb Pan, Xinhui:
[AMD Official Use Only - Internal Distribution Only]

________________________________________
发件人: Koenig, Christian <Christian.Koenig@xxxxxxx>
发送时间: 2020年2月7日 21:46
收件人: Pan, Xinhui; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
抄送: Kuehling, Felix; Deucher, Alexander
主题: Re: [RFC PATCH v2] drm/amdgpu: Remove kfd eviction fence before release bo

Am 07.02.20 um 14:42 schrieb Pan, Xinhui:
No need to trigger eviction as the memory mapping will not be used anymore.

All pt/pd bos share same resv, hence the same shared eviction fence. Everytime page table is freed, the fence will be signled and that cuases kfd unexcepted evictions.

Signed-off-by: xinhui pab <xinhui.pan@xxxxxxxxxxx>
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 78 ++++++++++++++++++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    | 10 ++-
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  5 +-
   drivers/gpu/drm/ttm/ttm_bo.c                  | 38 +++++----
   5 files changed, 111 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 47b0f2957d1f..265b1ed7264c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
                                                      struct mm_struct *mm);
   bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
   struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);

   struct amdkfd_process_info {
       /* List head of all VMs that belong to a KFD process */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ef721cb65868..11315095c29b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -223,7 +223,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
   static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
                                       struct amdgpu_amdkfd_fence *ef)
   {
-     struct dma_resv *resv = bo->tbo.base.resv;
+     struct dma_resv *resv = &bo->tbo.base._resv;
That won't work either and probably break a bunch of other cases.

[xh] kfd bos which are allocated explicitly use _resv AFAIK.
only pt/pd bos share the root._resv

No that is not correct. Imports potentially don't use _resv here.

Anyway please see the patchset I've pointed out in one of my last mails. 
With that we can assign bo->tbo.base.resv to bo->tbo._base for deleted BOs.

Christian.


Christian.

       struct dma_resv_list *old, *new;
       unsigned int i, j, k;

@@ -276,6 +276,78 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
       return 0;
   }

+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+     struct amdgpu_vm_bo_base        *vm_bo = bo->vm_bo;
+     struct amdgpu_vm *vm;
+     struct amdkfd_process_info *info;
+     struct amdgpu_amdkfd_fence *ef;
+     struct amdgpu_bo *parent;
+     int locked;
+     int ret;
+     struct ttm_bo_global *glob = &ttm_bo_glob;
+
+     if (vm_bo == NULL)
+             return 0;
+
+     /* Page table bo has a reference of the parent bo.
+      * BO itself can't guarntee the vm it points to is alive.
+      * for example, VM is going to free page tables, and the pt/pd bo might be
+      * freed by a workqueue. In that case, the vm might be freed already,
+      * leaving the bo->vm_bo points to vm.root.
+      *
+      * so to avoid that, when kfd free its vms,
+      * 1) set vm->process_info to NULL if this is the last vm.
+      * 2) set root_bo->vm_bo to NULL.
+      *
+      * but there are still races, just like
+      * cpu 1                cpu 2
+      *                      !vm_bo
+      * ->info = NULL
+      * free(info)
+      * ->vm_bo = NULL
+      * free (vm)
+      *                      info = vm->info //invalid vm
+      *
+      * So to avoid the race, use ttm_bo_glob lru_lock.
+      * generally speaking, adding a new lock is accceptable.
+      * But reusing this lock is simple.
+      */
+     parent = bo;
+     while (parent->parent)
+             parent = parent->parent;
+
+     spin_lock(&glob->lru_lock);
+     vm_bo = parent->vm_bo;
+     if (!vm_bo) {
+             spin_unlock(&glob->lru_lock);
+             return 0;
+     }
+
+     vm = vm_bo->vm;
+     if (!vm) {
+             spin_unlock(&glob->lru_lock);
+             return 0;
+     }
+
+     info = vm->process_info;
+     if (!info || !info->eviction_fence) {
+             spin_unlock(&glob->lru_lock);
+             return 0;
+     }
+
+     ef = container_of(dma_fence_get(&info->eviction_fence->base),
+                     struct amdgpu_amdkfd_fence, base);
+     spin_unlock(&glob->lru_lock);
+
+     locked = dma_resv_trylock(&bo->tbo.base._resv);
+     ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
+     dma_fence_put(&ef->base);
+     if (locked)
+             dma_resv_unlock(&bo->tbo.base._resv);
+     return ret;
+}
+
   static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
                                    bool wait)
   {
@@ -1030,6 +1102,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
   {
       struct amdkfd_process_info *process_info = vm->process_info;
       struct amdgpu_bo *pd = vm->root.base.bo;
+     struct ttm_bo_global *glob = &ttm_bo_glob;

       if (!process_info)
               return;
@@ -1051,6 +1124,9 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
               WARN_ON(!list_empty(&process_info->userptr_valid_list));
               WARN_ON(!list_empty(&process_info->userptr_inval_list));

+             spin_lock(&glob->lru_lock);
+             vm->process_info = NULL;
+             spin_unlock(&glob->lru_lock);
               dma_fence_put(&process_info->eviction_fence->base);
               cancel_delayed_work_sync(&process_info->restore_userptr_work);
               put_pid(process_info->pid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6f60a581e3ba..6ad12298fa84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1307,19 +1307,23 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
       if (abo->kfd_bo)
               amdgpu_amdkfd_unreserve_memory_limit(abo);

+     amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+     abo->vm_bo = NULL;
+
       if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
           !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
               return;

-     dma_resv_lock(bo->base.resv, NULL);
+     /* Only kfd bo wipe vram on release. The resv is &_resv. */
+     dma_resv_lock(&bo->base._resv, NULL);

-     r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
+     r = amdgpu_fill_buffer(abo, AMDGPU_POISON, &bo->base._resv, &fence);
       if (!WARN_ON(r)) {
               amdgpu_bo_fence(abo, fence, false);
               dma_fence_put(fence);
       }

-     dma_resv_unlock(bo->base.resv);
+     dma_resv_unlock(&bo->base._resv);
   }

   /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cc56eaba1911..2b96447e30e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -945,7 +945,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
   static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
   {
       if (entry->base.bo) {
-             entry->base.bo->vm_bo = NULL;
               list_del(&entry->base.vm_status);
               amdgpu_bo_unref(&entry->base.bo->shadow);
               amdgpu_bo_unref(&entry->base.bo);
@@ -3074,6 +3073,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
       struct amdgpu_bo_va_mapping *mapping, *tmp;
       bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
       struct amdgpu_bo *root;
+     struct ttm_bo_global *glob = &ttm_bo_glob;
       int i;

       amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
@@ -3105,6 +3105,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
       }

       amdgpu_vm_free_pts(adev, vm, NULL);
+     spin_lock(&glob->lru_lock);
+     root->vm_bo = NULL;
+     spin_unlock(&glob->lru_lock);
       amdgpu_bo_unreserve(root);
       amdgpu_bo_unref(&root);
       WARN_ON(vm->root.base.bo);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1494aebb8128..706cd60eb9e0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -429,8 +429,8 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
       BUG_ON(!dma_resv_trylock(&bo->base._resv));

       r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv);
-     if (r)
-             dma_resv_unlock(&bo->base._resv);
+
+     dma_resv_unlock(&bo->base._resv);

       return r;
   }
@@ -455,23 +455,19 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
       }
   }

-static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
+static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo, bool enqueue)
   {
       struct ttm_bo_device *bdev = bo->bdev;
       int ret;

-     ret = ttm_bo_individualize_resv(bo);
-     if (ret) {
-             /* Last resort, if we fail to allocate memory for the
-              * fences block for the BO to become idle
-              */
-             dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
-                                                 30 * HZ);
-             spin_lock(&ttm_bo_glob.lru_lock);
-             goto error;
-     }
-
       spin_lock(&ttm_bo_glob.lru_lock);
+
+     if (enqueue)
+             goto queue;
+
+     if (bo->base.resv != &bo->base._resv)
+             BUG_ON(!dma_resv_trylock(&bo->base._resv));
+
       ret = dma_resv_trylock(bo->base.resv) ? 0 : -EBUSY;
       if (!ret) {
               if (dma_resv_test_signaled_rcu(&bo->base._resv, true)) {
@@ -504,7 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
               dma_resv_unlock(&bo->base._resv);
       }

-error:
+queue:
       kref_get(&bo->list_kref);
       list_add_tail(&bo->ddestroy, &bdev->ddestroy);
       spin_unlock(&ttm_bo_glob.lru_lock);
@@ -655,6 +651,16 @@ static void ttm_bo_release(struct kref *kref)
           container_of(kref, struct ttm_buffer_object, kref);
       struct ttm_bo_device *bdev = bo->bdev;
       struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+     int ret;
+
+     ret = ttm_bo_individualize_resv(bo);
+     if (ret) {
+             /* Last resort, if we fail to allocate memory for the
+              * fences block for the BO to become idle
+              */
+             dma_resv_wait_timeout_rcu(bo->base.resv, true, false,
+                             30 * HZ);
+     }

       if (bo->bdev->driver->release_notify)
               bo->bdev->driver->release_notify(bo);
@@ -663,7 +669,7 @@ static void ttm_bo_release(struct kref *kref)
       ttm_mem_io_lock(man, false);
       ttm_mem_io_free_vm(bo);
       ttm_mem_io_unlock(man);
-     ttm_bo_cleanup_refs_or_queue(bo);
+     ttm_bo_cleanup_refs_or_queue(bo, !!ret);
       kref_put(&bo->list_kref, ttm_bo_release_list);
   }


_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx