Re: [Intel-gfx] [PATCH v5 2/6] drm/i915: Add locking to i915_gem_evict_vm()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 1/13/22 12:44, Maarten Lankhorst wrote:
i915_gem_evict_vm will need to be able to evict objects that are
locked by the current ctx. By testing if the current context already
locked the object, we can do this correctly. This allows us to
evict the entire vm even if we already hold some objects' locks.

Previously, this was spread over several commits, but it makes
more sense to commit the changes to i915_gem_evict_vm separately
from the changes to i915_gem_evict_something() and
i915_gem_evict_for_node().

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
---
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  2 +-
  drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  2 +-
  drivers/gpu/drm/i915/i915_gem_evict.c         | 42 ++++++++++++++++++-
  drivers/gpu/drm/i915/i915_gem_evict.h         |  4 +-
  drivers/gpu/drm/i915/i915_vma.c               |  7 +++-
  .../gpu/drm/i915/selftests/i915_gem_evict.c   | 10 ++++-
  6 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cf283b5f6ffe..4d832d6696b5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -767,7 +767,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
  		case 1:
  			/* Too fragmented, unbind everything and retry */
  			mutex_lock(&eb->context->vm->mutex);
-			err = i915_gem_evict_vm(eb->context->vm);
+			err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
  			mutex_unlock(&eb->context->vm->mutex);
  			if (err)
  				return err;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index fafd158e5313..a69787999d09 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -367,7 +367,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
  		if (vma == ERR_PTR(-ENOSPC)) {
  			ret = mutex_lock_interruptible(&ggtt->vm.mutex);
  			if (!ret) {
-				ret = i915_gem_evict_vm(&ggtt->vm);
+				ret = i915_gem_evict_vm(&ggtt->vm, &ww);
  				mutex_unlock(&ggtt->vm.mutex);
  			}
  			if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 24eee0c2055f..370eb7238d1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -74,6 +74,12 @@ static bool defer_evict(struct i915_vma *vma)
  	return false;
  }
+static int evict_dead(struct i915_vma *vma)
+{
+	atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+	return __i915_vma_unbind(vma);
+}
+
  /**
   * i915_gem_evict_something - Evict vmas to make room for binding a new one
   * @vm: address space to evict from
@@ -368,7 +374,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
   * To clarify: This is for freeing up virtual address space, not for freeing
   * memory in e.g. the shrinker.
   */
-int i915_gem_evict_vm(struct i915_address_space *vm)
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
  {
  	int ret = 0;
@@ -389,24 +395,56 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
  	do {
  		struct i915_vma *vma, *vn;
  		LIST_HEAD(eviction_list);
+		LIST_HEAD(locked_eviction_list);
list_for_each_entry(vma, &vm->bound_list, vm_link) {
+			if (!kref_read(&vma->obj->base.refcount)) {
+				ret = evict_dead(vma);
+				if (ret)
+					break;
+			}
+

Could the call to evict_dead corrupt the bound_list?

  			if (i915_vma_is_pinned(vma))
  				continue;
+ /*
+			 * If we already own the lock, trylock fails. In case the resv
+			 * is shared among multiple objects, we still need the object ref.
+			 */
+			if (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx)) {
+				__i915_vma_pin(vma);
+				list_add(&vma->evict_link, &locked_eviction_list);
+				continue;
+			}
+
+			if (!i915_gem_object_trylock(vma->obj, ww))
+				continue;
+
  			__i915_vma_pin(vma);
  			list_add(&vma->evict_link, &eviction_list);
  		}
-		if (list_empty(&eviction_list))
+		if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
  			break;
ret = 0;
+		/* Unbind locked objects first, before unlocking the eviction_list */
+		list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
+			__i915_vma_unpin(vma);
+
+			if (ret == 0)
+				ret = __i915_vma_unbind(vma);
+			if (ret != -EINTR) /* "Get me out of here!" */
+				ret = 0;
+		}
+
  		list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
  			__i915_vma_unpin(vma);
  			if (ret == 0)
  				ret = __i915_vma_unbind(vma);
  			if (ret != -EINTR) /* "Get me out of here!" */
  				ret = 0;
+
+			i915_gem_object_unlock(vma->obj);
  		}
  	} while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
index d4478b6ad11b..f5b7a9100609 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
@@ -10,6 +10,7 @@
struct drm_mm_node;
  struct i915_address_space;
+struct i915_gem_ww_ctx;
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
  					  u64 min_size, u64 alignment,
@@ -19,6 +20,7 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm,
  int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
  					 struct drm_mm_node *node,
  					 unsigned int flags);
-int i915_gem_evict_vm(struct i915_address_space *vm);
+int i915_gem_evict_vm(struct i915_address_space *vm,
+		      struct i915_gem_ww_ctx *ww);
#endif /* __I915_GEM_EVICT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1f15c3298112..8477cae5f877 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1535,7 +1535,12 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
  		/* Unlike i915_vma_pin, we don't take no for an answer! */
  		flush_idle_contexts(vm->gt);
  		if (mutex_lock_interruptible(&vm->mutex) == 0) {
-			i915_gem_evict_vm(vm);
+			/*
+			 * We pass NULL ww here, as we don't want to unbind
+			 * locked objects when called from execbuf when pinning
+			 * is removed. This would probably regress badly.
+			 */
+			i915_gem_evict_vm(vm, NULL);
  			mutex_unlock(&vm->mutex);
  		}
  	} while (1);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 75b709c26dd3..7c075c16a573 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -331,6 +331,7 @@ static int igt_evict_vm(void *arg)
  {
  	struct intel_gt *gt = arg;
  	struct i915_ggtt *ggtt = gt->ggtt;
+	struct i915_gem_ww_ctx ww;
  	LIST_HEAD(objects);
  	int err;
@@ -342,7 +343,7 @@ static int igt_evict_vm(void *arg) /* Everything is pinned, nothing should happen */
  	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_vm(&ggtt->vm);
+	err = i915_gem_evict_vm(&ggtt->vm, NULL);
  	mutex_unlock(&ggtt->vm.mutex);
  	if (err) {
  		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -352,9 +353,14 @@ static int igt_evict_vm(void *arg)
unpin_ggtt(ggtt); + i915_gem_ww_ctx_init(&ww, false);
  	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_vm(&ggtt->vm);
+	err = i915_gem_evict_vm(&ggtt->vm, &ww);
  	mutex_unlock(&ggtt->vm.mutex);
+
+	/* no -EDEADLK handling; can't happen with vm.mutex in place */
+	i915_gem_ww_ctx_fini(&ww);

This will break if/when we make vm.mutex a dma_resv ww mutex. Perhaps just use the for_i915_gem_ww macro?

/Thomas





[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux