+Boris and +Matthew in case you want to take over this patch set.
Here are some review / testing comments, including those I posted before
to ease tracking.
On 5/4/23 20:51, Christian König wrote:
Use the new component here as well and remove the old handling.
v2: drop dupplicate handling
Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 71 ++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 5 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 210 +++++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 7 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 --
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 -
7 files changed, 115 insertions(+), 204 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 02b827785e39..eba3e4f01ea6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -133,6 +141,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
list->first_userptr = first_userptr;
list->num_entries = num_entries;
+ sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+ amdgpu_bo_list_entry_cmp, NULL);
Previously amdgpu_bo_list_get_list sorted all entries, but this one only
sorts userptr entries. I think this changes behavior?
@@ -928,18 +874,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->user_invalidated = userpage_invalidated;
}
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto out_free_user_pages;
+ drm_exec_while_not_all_locked(&p->exec) {
+ r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec);
+ drm_exec_continue_on_contention(&p->exec);
Duplicate handling is needed for pretty much every call of
amdgpu_vm_lock_pd, as bo->tbo.base.resv == vm->root.bo->tbo.base.resv
for AMDGPU_GEM_CREATE_VM_ALWAYS_VALID.
I think Boris's suggestion of having this through a common
DRM_EXEC_FLAG_ALLOW_DUPLICATES flag fits well.
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, 2);
Previously there were comments for how the fence count is calculated,
now they seem to be removed. I'd prefer if they were properly retained
as finding out who calls drm_resv_add_fence is not trivial, and wrong
reservation count can also be really hard to debug.
Likewise for amdgpu_vm_lock_pd (which was added in another commit).
+ drm_exec_break_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+
+ e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
+ e->range = NULL;
+ }
+ drm_exec_continue_on_contention(&p->exec);
+
+ if (p->uf_bo) {
+ r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
+ 2);
+ drm_exec_continue_on_contention(&p->exec);
+ if (unlikely(r))
+ goto out_free_user_pages;
+ }
}
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct mm_struct *usermm;
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
+ usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+ if (usermm && usermm != current->mm) {
+ r = -EPERM;
+ goto out_free_user_pages;
+ }
+
+ if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+ e->user_invalidated && e->user_pages) {
+ amdgpu_bo_placement_from_domain(e->bo,
+ AMDGPU_GEM_DOMAIN_CPU);
+ r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
+ &ctx);
+ if (r)
+ goto out_free_user_pages;
+
+ amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+ e->user_pages);
+ }
+
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -1296,9 +1271,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
*/
r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+ r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
+ e->range);
e->range = NULL;
e->range = NULL; needs to be removed, or it's causing *massive* memory
leaks.
}
if (r) {
@@ -1307,20 +1281,22 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
p->fence = dma_fence_get(&leader->base.s_fence->finished);
- list_for_each_entry(e, &p->validated, tv.head) {
+ drm_exec_for_each_locked_object(&p->exec, index, gobj) {
+
+ ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
/* Everybody except for the gang leader uses READ */
for (i = 0; i < p->gang_size; ++i) {
if (p->jobs[i] == leader)
continue;
- dma_resv_add_fence(e->tv.bo->base.resv,
+ dma_resv_add_fence(gobj->resv,
&p->jobs[i]->base.s_fence->finished,
DMA_RESV_USAGE_READ);
}
- /* The gang leader is remembered as writer */
- e->tv.num_shared = 0;
+ /* The gang leader as remembered as writer */
+ dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
}
Previously PD used READ accesses, now everything is WRITE. This probably
isn't right.
Regards,
Tatsuyuki