On 2023-07-11 09:31, Christian König wrote:
Avoids quite a bit of logic and kmalloc overhead.
v2: fix multiple problems pointed out by Felix
Signed-off-by: Christian König <christian.koenig@xxxxxxx>
Two nit-picks inline about DRM_EXEC_INTERRUPTIBLE_WAIT. With those
fixed, the patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/Kconfig | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 5 +-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 299 +++++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 45 ++-
6 files changed, 162 insertions(+), 210 deletions(-)
[snip]
@@ -2538,50 +2489,41 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
*/
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
{
- struct amdgpu_bo_list_entry *pd_bo_list_entries;
- struct list_head resv_list, duplicates;
- struct ww_acquire_ctx ticket;
+ struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_sync sync;
+ struct drm_exec exec;
struct amdgpu_vm *peer_vm;
struct kgd_mem *mem, *tmp_mem;
struct amdgpu_bo *bo;
- struct ttm_operation_ctx ctx = { false, false };
- int i, ret;
-
- pd_bo_list_entries = kcalloc(process_info->n_vms,
- sizeof(struct amdgpu_bo_list_entry),
- GFP_KERNEL);
- if (!pd_bo_list_entries) {
- pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- ret = -ENOMEM;
- goto out_no_mem;
- }
-
- INIT_LIST_HEAD(&resv_list);
- INIT_LIST_HEAD(&duplicates);
+ int ret;
- /* Get all the page directory BOs that need to be reserved */
- i = 0;
- list_for_each_entry(peer_vm, &process_info->vm_list_head,
- vm_list_node)
- amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
- &pd_bo_list_entries[i++]);
- /* Add the userptr_inval_list entries to resv_list */
- list_for_each_entry(mem, &process_info->userptr_inval_list,
- validate_list.head) {
- list_add_tail(&mem->resv_list.head, &resv_list);
- mem->resv_list.bo = mem->validate_list.bo;
- mem->resv_list.num_shared = mem->validate_list.num_shared;
- }
+ amdgpu_sync_create(&sync);
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
This runs in a worker thread. So I think it doesn't need to be
interruptible.
/* Reserve all BOs and page tables for validation */
- ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
- WARN(!list_empty(&duplicates), "Duplicates should be empty");
- if (ret)
- goto out_free;
+ drm_exec_until_all_locked(&exec) {
+ /* Reserve all the page directories */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
- amdgpu_sync_create(&sync);
+ /* Reserve the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list) {
+ struct drm_gem_object *gobj;
+
+ gobj = &mem->bo->tbo.base;
+ ret = drm_exec_prepare_obj(&exec, gobj, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unreserve_out;
+ }
+ }
ret = process_validate_vms(process_info);
if (ret)
[snip]
@@ -1467,25 +1467,24 @@ static int svm_range_reserve_bos(struct
svm_validate_context *ctx)
uint32_t gpuidx;
int r;
- INIT_LIST_HEAD(&ctx->validate_list);
- for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
- pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
- if (!pdd) {
- pr_debug("failed to find device idx %d\n", gpuidx);
- return -EINVAL;
- }
- vm = drm_priv_to_vm(pdd->drm_priv);
-
- ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
- ctx->tv[gpuidx].num_shared = 4;
- list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
- }
+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
This function is only called from svm_range_validate_and_map, which has
an "intr" parameter. If you pass that through, you could make
DRM_EXEC_INTERRUPTIBLE_WAIT conditional on that.
Regards,
Felix
+ drm_exec_until_all_locked(&ctx->exec) {
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ r = -EINVAL;
+ goto unreserve_out;
+ }
+ vm = drm_priv_to_vm(pdd->drm_priv);
- r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
- ctx->intr, NULL);
- if (r) {
- pr_debug("failed %d to reserve bo\n", r);
- return r;
+ r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(r)) {
+ pr_debug("failed %d to reserve bo\n", r);
+ goto unreserve_out;
+ }
+ }
}
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
@@ -1508,13 +1507,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
return 0;
unreserve_out:
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
return r;
}
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)