Re: [PATCH 06/13] drm/amdgpu: use the new drm_exec object for CS v2

Tatsuyuki Ishi <ishitatsuyuki@xxxxxxxxx> · Tue, 20 Jun 2023 13:07:40 +0900

+Boris and +Matthew in case you want to take over this patch set.

Here are some review / testing comments, including those I posted before 
to ease tracking.

On 5/4/23 20:51, Christian König wrote:
Use the new component here as well and remove the old handling.

v2: drop dupplicate handling

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h         |   1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  71 ++-----
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |   5 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 210 +++++++++-----------
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h      |   7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |  22 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |   3 -
  7 files changed, 115 insertions(+), 204 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 02b827785e39..eba3e4f01ea6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -133,6 +141,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
  
  	list->first_userptr = first_userptr;
  	list->num_entries = num_entries;
+	sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+	     amdgpu_bo_list_entry_cmp, NULL);

Previously amdgpu_bo_list_get_list sorted all entries, but this one only 
sorts userptr entries. I think this changes behavior?

@@ -928,18 +874,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
  		e->user_invalidated = userpage_invalidated;
  	}
  
-	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-				   &duplicates);
-	if (unlikely(r != 0)) {
-		if (r != -ERESTARTSYS)
-			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-		goto out_free_user_pages;
+	drm_exec_while_not_all_locked(&p->exec) {
+		r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec);
+		drm_exec_continue_on_contention(&p->exec);

Duplicate handling is needed for pretty much every call of 
amdgpu_vm_lock_pd, as bo->tbo.base.resv == vm->root.bo->tbo.base.resv 
for AMDGPU_GEM_CREATE_VM_ALWAYS_VALID.

I think Boris's suggestion of having this through a common 
DRM_EXEC_FLAG_ALLOW_DUPLICATES flag fits well.

+		if (unlikely(r))
+			goto out_free_user_pages;
+
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, 2);

Previously there were comments for how the fence count is calculated, 
now they seem to be removed. I'd prefer if they were properly retained 
as finding out who calls drm_resv_add_fence is not trivial, and wrong 
reservation count can also be really hard to debug.

Likewise for amdgpu_vm_lock_pd (which was added in another commit).

+			drm_exec_break_on_contention(&p->exec);
+			if (unlikely(r))
+				goto out_free_user_pages;
+
+			e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
+			e->range = NULL;
+		}
+		drm_exec_continue_on_contention(&p->exec);
+
+		if (p->uf_bo) {
+			r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
+						 2);
+			drm_exec_continue_on_contention(&p->exec);
+			if (unlikely(r))
+				goto out_free_user_pages;
+		}
  	}
  
-	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct mm_struct *usermm;
  
-		e->bo_va = amdgpu_vm_bo_find(vm, bo);
+		usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+		if (usermm && usermm != current->mm) {
+			r = -EPERM;
+			goto out_free_user_pages;
+		}
+
+		if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+		    e->user_invalidated && e->user_pages) {
+			amdgpu_bo_placement_from_domain(e->bo,
+							AMDGPU_GEM_DOMAIN_CPU);
+			r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
+					    &ctx);
+			if (r)
+				goto out_free_user_pages;
+
+			amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+						     e->user_pages);
+		}
+
+		kvfree(e->user_pages);
+		e->user_pages = NULL;
  	}
  
  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -1296,9 +1271,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  	 */
  	r = 0;
  	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+		r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
+							e->range);
  		e->range = NULL;

e->range = NULL; needs to be removed, or it's causing *massive* memory 
leaks.

  	}
  	if (r) {
@@ -1307,20 +1281,22 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  	}
  
  	p->fence = dma_fence_get(&leader->base.s_fence->finished);
-	list_for_each_entry(e, &p->validated, tv.head) {
+	drm_exec_for_each_locked_object(&p->exec, index, gobj) {
+
+		ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
  
  		/* Everybody except for the gang leader uses READ */
  		for (i = 0; i < p->gang_size; ++i) {
  			if (p->jobs[i] == leader)
  				continue;
  
-			dma_resv_add_fence(e->tv.bo->base.resv,
+			dma_resv_add_fence(gobj->resv,
  					   &p->jobs[i]->base.s_fence->finished,
  					   DMA_RESV_USAGE_READ);
  		}
  
-		/* The gang leader is remembered as writer */
-		e->tv.num_shared = 0;
+		/* The gang leader as remembered as writer */
+		dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
  	}

Previously PD used READ accesses, now everything is WRITE. This probably 
isn't right.

Regards,
Tatsuyuki