Re: [PATCH 4/5] drm/amdgpu: add VM eviction lock v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2019-12-05 8:39 a.m., Christian König wrote:
This allows to invalidate VM entries without taking the reservation lock.

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +++++++++++++++++++++-----
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +++
  2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0d700e8154c4..839d6df394fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -656,7 +656,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  			      void *param)
  {
  	struct amdgpu_vm_bo_base *bo_base, *tmp;
-	int r = 0;
+	int r;
vm->bulk_moveable &= list_empty(&vm->evicted); @@ -665,7 +665,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = validate(param, bo);
  		if (r)
-			break;
+			return r;
if (bo->tbo.type != ttm_bo_type_kernel) {
  			amdgpu_vm_bo_moved(bo_base);
@@ -678,7 +678,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  		}
  	}
- return r;
+	mutex_lock(&vm->eviction_lock);
+	vm->evicting = false;
+	mutex_unlock(&vm->eviction_lock);
+
+	return 0;
  }
/**
@@ -1555,15 +1559,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
  	if (!(flags & AMDGPU_PTE_VALID))
  		owner = AMDGPU_FENCE_OWNER_KFD;
+ mutex_lock(&vm->eviction_lock);
+	if (vm->evicting) {
+		r = EINPROGRESS;

Not sure about this error code. As far as I can find, this is normally used on non-blocking sockets. I found this explanation: https://stackoverflow.com/questions/8277970/what-are-possible-reason-for-socket-error-einprogress-in-solaris

Quote: "so there is another error for non-blocking connect: EINPROGRESS, which tells you that the operation is in progress and you should check its status later."

This call is neither non-blocking nor is the requested page table update in progress when this error is returned. So I'd think a better error to return here would be EBUSY.

Other than that, this patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>


+		goto error_unlock;
+	}
+
  	r = vm->update_funcs->prepare(&params, owner, exclusive);
  	if (r)
-		return r;
+		goto error_unlock;
r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
  	if (r)
-		return r;
+		goto error_unlock;
- return vm->update_funcs->commit(&params, fence);
+	r = vm->update_funcs->commit(&params, fence);
+
+error_unlock:
+	mutex_unlock(&vm->eviction_lock);
+	return r;
  }
/**
@@ -2522,11 +2536,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
  	if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
  		return false;
+ /* Try to block ongoing updates */
+	if (!mutex_trylock(&bo_base->vm->eviction_lock))
+		return false;
+
  	/* Don't evict VM page tables while they are updated */
  	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
-	    !dma_fence_is_signaled(bo_base->vm->last_delayed))
+	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+		mutex_unlock(&bo_base->vm->eviction_lock);
  		return false;
+	}
+ bo_base->vm->evicting = true;
+	mutex_unlock(&bo_base->vm->eviction_lock);
  	return true;
  }
@@ -2773,6 +2795,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  	vm->last_direct = dma_fence_get_stub();
  	vm->last_delayed = dma_fence_get_stub();
+ mutex_init(&vm->eviction_lock);
+	vm->evicting = false;
+
  	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
  	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
  		bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d93ea9ad879e..d5613d184e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -242,6 +242,10 @@ struct amdgpu_vm {
  	/* tree of virtual addresses mapped */
  	struct rb_root_cached	va;
+ /* Lock to prevent eviction while we are updating page tables */
+	struct mutex		eviction_lock;
+	bool			evicting;
+
  	/* BOs who needs a validation */
  	struct list_head	evicted;
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux