Re: [PATCH v4 2/2] drm/amdgpu: sync page table freeing with tlb flush

Christian König <christian.koenig@xxxxxxx> · Mon, 4 Mar 2024 11:05:33 +0100

Am 01.03.24 um 21:17 schrieb Sharma, Shashank:

On 01/03/2024 14:29, Christian König wrote:


Am 01.03.24 um 12:07 schrieb Shashank Sharma:
The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist which will keep the objects that need to be
   freed after tlb_flush
- Adds PT entries in this list in amdgpu_vm_pt_free_dfs, instead of 
freeing
   them immediately.
- Exports function amdgpu_vm_pt_free to be called dircetly.
- Adds a 'force' input bool to amdgpu_vm_pt_free_dfs to differentiate
   between immediate freeing of the BOs (like from
   amdgpu_vm_pt_free_root) vs delayed freeing.

V2: rebase
V4: (Christian)
     - add only locked PTEs entries in TLB flush waitlist.
     - do not create a separate function for list flush.
     - do not create a new lock for TLB flush.
     - there is no need to wait on tlb_flush_fence exclusively.

Cc: Christian König <Christian.Koenig@xxxxxxx>
Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Felix Kuehling <felix.kuehling@xxxxxxx>
Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 10 ++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 21 ++++++++++++++-------
  3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 310aae6fb49b..94581a1fe34f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -990,11 +990,20 @@ int amdgpu_vm_update_range(struct 
amdgpu_device *adev, struct amdgpu_vm *vm,
        /* Prepare a TLB flush fence to be attached to PTs */
      if (!unlocked && params.needs_flush && vm->is_compute_context) {
+        struct amdgpu_vm_bo_base *entry, *next;
+
          amdgpu_vm_tlb_fence_create(adev, vm, fence);
            /* Makes sure no PD/PT is freed before the flush */
          dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
                     DMA_RESV_USAGE_BOOKKEEP);
+
+        if (list_empty(&vm->tlb_flush_waitlist))
+            goto error_unlock;
+
+        /* Now actually free the waitlist */
+        list_for_each_entry_safe(entry, next, 
&vm->tlb_flush_waitlist, vm_status)
+            amdgpu_vm_pt_free(entry);

This needs to be outside of the "if". We also need to free the 
PDs/PTs in non compute VMs.


Noted,


      }
    error_unlock:
@@ -2214,6 +2223,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
      INIT_LIST_HEAD(&vm->pt_freed);
      INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
      INIT_KFIFO(vm->faults);
+    INIT_LIST_HEAD(&vm->tlb_flush_waitlist);
        r = amdgpu_seq64_alloc(adev, &vm->tlb_seq_va, 
&vm->tlb_seq_cpu_addr);
      if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 298f604b8e5f..ba374c2c61bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -343,6 +343,9 @@ struct amdgpu_vm {
      uint64_t        *tlb_seq_cpu_addr;
      uint64_t        tlb_fence_context;
  +    /* temporary storage of PT BOs until the TLB flush */
+    struct list_head    tlb_flush_waitlist;
+
      atomic64_t        kfd_last_flushed_seq;
        /* How many times we had to re-generate the page tables */
@@ -545,6 +548,7 @@ int amdgpu_vm_ptes_update(struct 
amdgpu_vm_update_params *params,
                uint64_t start, uint64_t end,
                uint64_t dst, uint64_t flags);
  void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry);
    #if defined(CONFIG_DEBUG_FS)
  void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct 
seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 95dc0afdaffb..cb14e5686c0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -636,7 +636,7 @@ static int amdgpu_vm_pt_alloc(struct 
amdgpu_device *adev,
   *
   * @entry: PDE to free
   */
-static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
  {
      struct amdgpu_bo *shadow;
  @@ -685,13 +685,15 @@ void amdgpu_vm_pt_free_work(struct 
work_struct *work)
   * @vm: amdgpu vm structure
   * @start: optional cursor where to start freeing PDs/PTs
   * @unlocked: vm resv unlock status
+ * @force: force free all PDs/PTs without waiting for TLB flush
   *
   * Free the page directory or page table level and all sub levels.
   */
  static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
                    struct amdgpu_vm *vm,
                    struct amdgpu_vm_pt_cursor *start,
-                  bool unlocked)
+                  bool unlocked,
+                  bool force)
  {
      struct amdgpu_vm_pt_cursor cursor;
      struct amdgpu_vm_bo_base *entry;
@@ -708,11 +710,15 @@ static void amdgpu_vm_pt_free_dfs(struct 
amdgpu_device *adev,
          return;
      }
  -    for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
-        amdgpu_vm_pt_free(entry);
+    for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) {
+        if (!force)
+            list_move(&entry->vm_status, &vm->tlb_flush_waitlist);
+        else
+            amdgpu_vm_pt_free(entry);
+    }
        if (start)
-        amdgpu_vm_pt_free(start->entry);
+        list_move(&start->entry->vm_status, &vm->tlb_flush_waitlist);
  }

The complexity inside amdgpu_vm_pt_free_dfs() really starts to make 
no sense any more.

First of all ditch the usage in amdgpu_vm_pt_free_root(). Instead 
just inline the call to for_each_amdgpu_vm_pt_dfs_safe() to free up 
all of the page tables immediately.


Noted,


Then the other use case in amdgpu_vm_ptes_update():

/* Make sure previous mapping is freed */
if (cursor.entry->bo) {
    params->table_freed = true;
    amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, params->unlocked);
}

We should basically change params->table_freed into a list_head and 
put all to be freed entries on there. Something like that:

spin_lock(&vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
    list_move(&entry->vm_status, &params->table_freed);
spin_unlock(&vm->status_lock);

And then finally in amdgpu_vm_update_range() we should call 
amdgpu_vm_pt_free_dfs() to really free up all PDs/PTs (probably 
rename the function while at it).

- in amdgpu_vm_pt_free_dfs, there are two different lists for locked 
and unlocked PT entries

  unlocked is saved in vm->pt_freed

  locked is freed immediately, which we now want to add into another 
list  (params->table_freed in your suggestion) and free after tlb_flush

 which means we have to do something like this:

in amdgpu_vm_ptes_update {

 /* Make sure previous mapping is freed */
if (cursor.entry->bo) {

    for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)

    if (unlocked) {

        list_move(&entry->vm_status, &vm->pt_free);

    else
        list_move(&entry->vm_status, &params->table_freed);

No, here just move everything to params->tables_freed as necessary.


}

}

and then we have to change the amdgpu_vm_pt_free_dfs() implementation 
to simply free BOs, not searching anything like:

if (unlocked) {

And then do here a list_splice() to move everything from tables_freed to 
pt_free.


    schedule_work(pt_free_work)

} else {

   list_for_each(params->table_freed)

    amdgpu_vm_pt_free(entry)

}

and then we would be able to call amdgpu_vm_pt_free_dfs() from 
amdgpu_vm_update_range().

Does that sound good to you ?

Very close, I just want to avoid any extra unlocked handling in the low 
level PTE update function.

Essentially we only record which PDs/PTs to free in there because we 
have a cursor at the right location in that moment.

The state machine itself is something I try to keep out of the PTE 
update code.

Regards,
Christian.


- Shashank


Regards,
Christian.

    /**
@@ -724,7 +730,7 @@ static void amdgpu_vm_pt_free_dfs(struct 
amdgpu_device *adev,
   */
  void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
  {
-    amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
+    amdgpu_vm_pt_free_dfs(adev, vm, NULL, false, true);
  }
    /**
@@ -1059,7 +1065,8 @@ int amdgpu_vm_ptes_update(struct 
amdgpu_vm_update_params *params,
                      params->needs_flush = true;
                      amdgpu_vm_pt_free_dfs(adev, params->vm,
                                    &cursor,
-                                  params->unlocked);
+                                  params->unlocked,
+                                  false);
                  }
                  amdgpu_vm_pt_next(adev, &cursor);
              }