[PATCH v4 2/2] drm/amdgpu: unref pt bo after job submit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Free page table bo before job submit is insane.
We might touch invalid memory while job is runnig.

we now have individualized bo resv during bo releasing.
So any fences added to root PT bo is actually untested when
a normal PT bo is releasing.

We might hit gmc page fault or memory just got overwrited.

Cc: Christian König <christian.koenig@xxxxxxx>
Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 +++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 73398831196f..346e2f753474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -937,6 +937,21 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	return r;
 }
 
+static void amdgpu_vm_free_zombie_bo(struct amdgpu_device *adev,
+		struct amdgpu_vm *vm)
+{
+	struct amdgpu_vm_pt *entry;
+
+	while (!list_empty(&vm->zombies)) {
+		entry = list_first_entry(&vm->zombies, struct amdgpu_vm_pt,
+				base.vm_status);
+		list_del(&entry->base.vm_status);
+
+		amdgpu_bo_unref(&entry->base.bo->shadow);
+		amdgpu_bo_unref(&entry->base.bo);
+	}
+}
+
 /**
  * amdgpu_vm_free_table - fre one PD/PT
  *
@@ -945,10 +960,9 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
 {
 	if (entry->base.bo) {
+		list_move(&entry->base.vm_status,
+				&entry->base.bo->vm_bo->vm->zombies);
 		entry->base.bo->vm_bo = NULL;
-		list_del(&entry->base.vm_status);
-		amdgpu_bo_unref(&entry->base.bo->shadow);
-		amdgpu_bo_unref(&entry->base.bo);
 	}
 	kvfree(entry->entries);
 	entry->entries = NULL;
@@ -1624,6 +1638,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, fence);
 
 error_unlock:
+	amdgpu_vm_free_zombie_bo(adev, vm);
 	amdgpu_vm_eviction_unlock(vm);
 	return r;
 }
@@ -2807,6 +2822,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	INIT_LIST_HEAD(&vm->invalidated);
 	spin_lock_init(&vm->invalidated_lock);
 	INIT_LIST_HEAD(&vm->freed);
+	INIT_LIST_HEAD(&vm->zombies);
 
 
 	/* create scheduler entities for page table updates */
@@ -3119,6 +3135,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	}
 
 	amdgpu_vm_free_pts(adev, vm, NULL);
+	amdgpu_vm_free_zombie_bo(adev, vm);
+
 	amdgpu_bo_unreserve(root);
 	amdgpu_bo_unref(&root);
 	WARN_ON(vm->root.base.bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b5705fcfc935..9baf44fa16f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -269,6 +269,9 @@ struct amdgpu_vm {
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
 
+	/* BO will be freed soon */
+	struct list_head	zombies;
+
 	/* contains the page directory */
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_update;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux