On 2024-03-01 06:07, Shashank Sharma
wrote:
The idea behind this patch is to delay the freeing of PT entry objects until the TLB flush is done. This patch: - Adds a tlb_flush_waitlist which will keep the objects that need to be freed after tlb_flush - Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of freeing them immediately. - Exports function amdgpu_vm_pt_free to be called dircetly. - Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate between immediate freeing of the BOs (like from amdgpu_vm_pt_free_root) vs delayed freeing. V2: rebase V4: (Christian) - add only locked PTEs entries in TLB flush waitlist. - do not create a separate function for list flush. - do not create a new lock for TLB flush. - there is no need to wait on tlb_flush_fence exclusively. Cc: Christian König <Christian.Koenig@xxxxxxx> Cc: Alex Deucher <alexander.deucher@xxxxxxx> Cc: Felix Kuehling <felix.kuehling@xxxxxxx> Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++++++++++++++------- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 310aae6fb49b..94581a1fe34f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* Prepare a TLB flush fence to be attached to PTs */ if (!unlocked && params.needs_flush && vm->is_compute_context) { + struct amdgpu_vm_bo_base *entry, *next; + amdgpu_vm_tlb_fence_create(adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, DMA_RESV_USAGE_BOOKKEEP); + + if (list_empty(&vm->tlb_flush_waitlist)) + goto error_unlock; + + /* Now actually free the waitlist */ + list_for_each_entry_safe(entry, next, &vm->tlb_flush_waitlist, vm_status) + amdgpu_vm_pt_free(entry); } error_unlock: @@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); INIT_KFIFO(vm->faults); + INIT_LIST_HEAD(&vm->tlb_flush_waitlist); r = amdgpu_seq64_alloc(adev, &vm->tlb_seq_va, &vm->tlb_seq_cpu_addr); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 298f604b8e5f..ba374c2c61bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -343,6 +343,9 @@ struct amdgpu_vm { uint64_t *tlb_seq_cpu_addr; uint64_t tlb_fence_context; + /* temporary storage of PT BOs until the TLB flush */ + struct list_head tlb_flush_waitlist; + atomic64_t kfd_last_flushed_seq; /* How many times we had to re-generate the page tables */ @@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); void amdgpu_vm_pt_free_work(struct work_struct *work); +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 95dc0afdaffb..cb14e5686c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, * * @entry: PDE to free */ -static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) +void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) { struct amdgpu_bo *shadow; @@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) * @vm: amdgpu vm structure * @start: optional cursor where to start freeing PDs/PTs * @unlocked: vm resv unlock status + * @force: force free all PDs/PTs without waiting for TLB flush * * Free the page directory or page table level and all sub levels. */ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt_cursor *start, - bool unlocked) + bool unlocked, + bool force) { struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_bo_base *entry; @@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, return; } - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - amdgpu_vm_pt_free(entry);
I feel like if we attach tlb flush fence before free pt bo, then don't need tlb_flush_waitlist.
Regards,
Philip
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) { + if (!force) + list_move(&entry->vm_status, &vm->tlb_flush_waitlist); + else + amdgpu_vm_pt_free(entry); + } if (start) - amdgpu_vm_pt_free(start->entry); + list_move(&start->entry->vm_status, &vm->tlb_flush_waitlist); } /** @@ -724,7 +730,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, */ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); + amdgpu_vm_pt_free_dfs(adev, vm, NULL, false, true); } /** @@ -1059,7 +1065,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, params->needs_flush = true; amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, - params->unlocked); + params->unlocked, + false); } amdgpu_vm_pt_next(adev, &cursor); }