This patch:
- Attaches the TLB flush fence to the PT objects being freed
- Adds a new ptr in VM to save this last TLB flush fence
- Adds a new lock in VM to prevent out-of-context update of saved
TLB flush fence
- Adds a new ptr in tlb_flush structure to save vm
The idea is to delay freeing of page table objects until we have the
respective TLB entries flushed.
V2: rebase
Cc: Christian König <Christian.Koenig@xxxxxxx>
Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Felix Kuehling <felix.kuehling@xxxxxxx>
Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 27 +++++++++++++++++++
.../gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 13 +++++++--
4 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67c690044b97..b0e81c249e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2245,6 +2245,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
vm->generation = 0;
mutex_init(&vm->eviction_lock);
+ mutex_init(&vm->tlb_flush_lock);
vm->evicting = false;
vm->tlb_fence_context = dma_fence_context_alloc(1);
@@ -2360,7 +2361,9 @@ int amdgpu_vm_make_compute(struct
amdgpu_device *adev, struct amdgpu_vm *vm)
}
dma_fence_put(vm->last_update);
+ dma_fence_put(vm->tlb_fence_last);
vm->last_update = dma_fence_get_stub();
+ vm->tlb_fence_last = dma_fence_get_stub();
vm->is_compute_context = true;
/* Free the shadow bo for compute VM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 8e6fd25d07b7..b05bc586237f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -334,6 +334,10 @@ struct amdgpu_vm {
uint64_t *tlb_seq_cpu_addr;
uint64_t tlb_fence_context;
+ /* Ptr and lock to maintain tlb flush sync */
+ struct mutex tlb_flush_lock;
+ struct dma_fence *tlb_fence_last;
+
atomic64_t kfd_last_flushed_seq;
/* How many times we had to re-generate the page tables */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..f1c4418c4d63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -631,6 +631,18 @@ static int amdgpu_vm_pt_alloc(struct
amdgpu_device *adev,
return r;
}
+static inline
+void amdgpu_vm_attach_tlb_fence(struct amdgpu_bo *bo, struct
dma_fence *fence)
+{
+ if (!bo || !fence)
+ return;
+
+ if (!dma_resv_reserve_fences(bo->tbo.base.resv, 1)) {
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+}
+
/**
* amdgpu_vm_pt_free - free one PD/PT
*
@@ -638,6 +650,7 @@ static int amdgpu_vm_pt_alloc(struct
amdgpu_device *adev,
*/
static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
{
+ struct amdgpu_vm *vm;
struct amdgpu_bo *shadow;
if (!entry->bo)
@@ -646,9 +659,23 @@ static void amdgpu_vm_pt_free(struct
amdgpu_vm_bo_base *entry)
entry->bo->vm_bo = NULL;
shadow = amdgpu_bo_shadowed(entry->bo);
if (shadow) {
+ vm = shadow->vm_bo->vm;
+
+ mutex_lock(&vm->tlb_flush_lock);
+ if (vm->tlb_fence_last)
+ amdgpu_vm_attach_tlb_fence(shadow, vm->tlb_fence_last);
+ mutex_unlock(&vm->tlb_flush_lock);
+
ttm_bo_set_bulk_move(&shadow->tbo, NULL);
amdgpu_bo_unref(&shadow);
}
+
+ vm = entry->vm;
+ mutex_lock(&vm->tlb_flush_lock);
+ if (vm->tlb_fence_last)
+ amdgpu_vm_attach_tlb_fence(entry->bo, vm->tlb_fence_last);
+ mutex_unlock(&vm->tlb_flush_lock);
+