On Thu, 16 Dec 2021 at 14:28, Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> wrote: > > i915_gem_evict_vm will need to be able to evict objects that are > locked by the current ctx. By testing if the current context already > locked the object, we can do this correctly. This allows us to > evict the entire vm even if we already hold some objects' locks. > > Previously, this was spread over several commits, but it makes > more sense to commit the changes to i915_gem_evict_vm separately > from the changes to i915_gem_evict_something() and > i915_gem_evict_for_node(). > > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- > drivers/gpu/drm/i915/i915_drv.h | 3 +- > drivers/gpu/drm/i915/i915_gem_evict.c | 30 +++++++++++++++++-- > drivers/gpu/drm/i915/i915_vma.c | 7 ++++- > .../gpu/drm/i915/selftests/i915_gem_evict.c | 10 +++++-- > 6 files changed, 46 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 2213f7b613da..eb3649e844ff 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -766,7 +766,7 @@ static int eb_reserve(struct i915_execbuffer *eb) > case 1: > /* Too fragmented, unbind everything and retry */ > mutex_lock(&eb->context->vm->mutex); > - err = i915_gem_evict_vm(eb->context->vm); > + err = i915_gem_evict_vm(eb->context->vm, &eb->ww); > mutex_unlock(&eb->context->vm->mutex); > if (err) > return err; > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > index 00cd9642669a..2856098cb449 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > @@ -366,7 +366,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) > if (vma == ERR_PTR(-ENOSPC)) { > ret = mutex_lock_interruptible(&ggtt->vm.mutex); > if (!ret) { > - ret = i915_gem_evict_vm(&ggtt->vm); > + ret = i915_gem_evict_vm(&ggtt->vm, &ww); > mutex_unlock(&ggtt->vm.mutex); > } > if (ret) > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d51628d10f9d..c180019c607f 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1725,7 +1725,8 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm, > int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, > struct drm_mm_node *node, > unsigned int flags); > -int i915_gem_evict_vm(struct i915_address_space *vm); > +int i915_gem_evict_vm(struct i915_address_space *vm, > + struct i915_gem_ww_ctx *ww); > > /* i915_gem_internal.c */ > struct drm_i915_gem_object * > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index 2b73ddb11c66..bfd66f539fc1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -367,7 +367,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, > * To clarify: This is for freeing up virtual address space, not for freeing > * memory in e.g. the shrinker. > */ > -int i915_gem_evict_vm(struct i915_address_space *vm) > +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) > { > int ret = 0; > > @@ -388,24 +388,50 @@ int i915_gem_evict_vm(struct i915_address_space *vm) > do { > struct i915_vma *vma, *vn; > LIST_HEAD(eviction_list); > + LIST_HEAD(locked_eviction_list); > > list_for_each_entry(vma, &vm->bound_list, vm_link) { > if (i915_vma_is_pinned(vma)) > continue; > > + /* > + * If we already own the lock, trylock fails. In case the resv > + * is shared among multiple objects, we still need the object ref. What is "object ref" here? I assume it's just leftovers... Otherwise, Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> > + */ > + if (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx)) { > + __i915_vma_pin(vma); > + list_add(&vma->evict_link, &locked_eviction_list); > + continue; > + } > + > + if (!i915_gem_object_trylock(vma->obj, ww)) > + continue; > + > __i915_vma_pin(vma); > list_add(&vma->evict_link, &eviction_list); > } > - if (list_empty(&eviction_list)) > + if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) > break; > > ret = 0; > + /* Unbind locked objects first, before unlocking the eviction_list */ > + list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { > + __i915_vma_unpin(vma); > + > + if (ret == 0) > + ret = __i915_vma_unbind(vma); > + if (ret != -EINTR) /* "Get me out of here!" */ > + ret = 0; > + } > + > list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { > __i915_vma_unpin(vma); > if (ret == 0) > ret = __i915_vma_unbind(vma); > if (ret != -EINTR) /* "Get me out of here!" */ > ret = 0; > + > + i915_gem_object_unlock(vma->obj); > } > } while (ret == 0); > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index de24e4b3b19b..d24e90eac948 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -1462,7 +1462,12 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > /* Unlike i915_vma_pin, we don't take no for an answer! */ > flush_idle_contexts(vm->gt); > if (mutex_lock_interruptible(&vm->mutex) == 0) { > - i915_gem_evict_vm(vm); > + /* > + * We pass NULL ww here, as we don't want to unbind > + * locked objects when called from execbuf when pinning > + * is removed. This would probably regress badly. > + */ > + i915_gem_evict_vm(vm, NULL); > mutex_unlock(&vm->mutex); > } > } while (1); > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > index 7e0658a77659..7178811366af 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > @@ -331,6 +331,7 @@ static int igt_evict_vm(void *arg) > { > struct intel_gt *gt = arg; > struct i915_ggtt *ggtt = gt->ggtt; > + struct i915_gem_ww_ctx ww; > LIST_HEAD(objects); > int err; > > @@ -342,7 +343,7 @@ static int igt_evict_vm(void *arg) > > /* Everything is pinned, nothing should happen */ > mutex_lock(&ggtt->vm.mutex); > - err = i915_gem_evict_vm(&ggtt->vm); > + err = i915_gem_evict_vm(&ggtt->vm, NULL); > mutex_unlock(&ggtt->vm.mutex); > if (err) { > pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", > @@ -352,9 +353,14 @@ static int igt_evict_vm(void *arg) > > unpin_ggtt(ggtt); > > + i915_gem_ww_ctx_init(&ww, false); > mutex_lock(&ggtt->vm.mutex); > - err = i915_gem_evict_vm(&ggtt->vm); > + err = i915_gem_evict_vm(&ggtt->vm, &ww); > mutex_unlock(&ggtt->vm.mutex); > + > + /* no -EDEADLK handling; can't happen with vm.mutex in place */ > + i915_gem_ww_ctx_fini(&ww); > + > if (err) { > pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", > err); > -- > 2.34.1 >