Re: [PATCH] drm/amdgpu: fix error handling in amdgpu_vm_init

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 01.11.23 um 23:00 schrieb Felix Kuehling:
On 2023-10-31 11:18, Alex Deucher wrote:
On Tue, Oct 31, 2023 at 11:12 AM Christian König
<ckoenig.leichtzumerken@xxxxxxxxx> wrote:
When clearing the root PD fails we need to properly release it again.

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
Acked-by: Alex Deucher <alexander.deucher@xxxxxxx>
Has this been submitted? I see some intermittent failures in the PSDB that may be related to this.

Not yet. But going to push that now

This is just a fix for the error code path. Fixing the underlying problem had more importance.

Regards,
Christian.


Regards,
  Felix




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++-------------
  1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d72daf15662f..5877f6e9b893 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2042,7 +2042,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                  int32_t xcp_id)
  {
         struct amdgpu_bo *root_bo;
         struct amdgpu_bo_vm *root;
@@ -2061,6 +2062,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
         INIT_LIST_HEAD(&vm->done);
         INIT_LIST_HEAD(&vm->pt_freed);
         INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
+       INIT_KFIFO(vm->faults);

         /* create scheduler entities for page table updates */
         r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, @@ -2103,34 +2105,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp
                                 false, &root, xcp_id);
         if (r)
                 goto error_free_delayed;
-       root_bo = &root->bo;
+
+       root_bo = amdgpu_bo_ref(&root->bo);
         r = amdgpu_bo_reserve(root_bo, true);
-       if (r)
-               goto error_free_root;
+       if (r) {
+               amdgpu_bo_unref(&root->shadow);
+               amdgpu_bo_unref(&root_bo);
+               goto error_free_delayed;
+       }

+       amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
         r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
         if (r)
-               goto error_unreserve;
-
-       amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+               goto error_free_root;

         r = amdgpu_vm_pt_clear(adev, vm, root, false);
         if (r)
-               goto error_unreserve;
+               goto error_free_root;

         amdgpu_bo_unreserve(vm->root.bo);
-
-       INIT_KFIFO(vm->faults);
+       amdgpu_bo_unref(&root_bo);

         return 0;

-error_unreserve:
-       amdgpu_bo_unreserve(vm->root.bo);
-
  error_free_root:
-       amdgpu_bo_unref(&root->shadow);
+       amdgpu_vm_pt_free_root(adev, vm);
+       amdgpu_bo_unreserve(vm->root.bo);
         amdgpu_bo_unref(&root_bo);
-       vm->root.bo = NULL;

  error_free_delayed:
         dma_fence_put(vm->last_tlb_flush);
--
2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux