On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote: > Handle persistent (VM_BIND) mappings during the request submission > in the execbuf3 path. > > Signed-off-by: Niranjana Vishwanathapura > <niranjana.vishwanathapura@xxxxxxxxx> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer3.c | 176 > +++++++++++++++++- > 1 file changed, 175 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > index 13121df72e3d..2079f5ca9010 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > @@ -22,6 +22,7 @@ > #include "i915_gem_vm_bind.h" > #include "i915_trace.h" > > +#define __EXEC3_HAS_PIN BIT_ULL(33) > #define __EXEC3_ENGINE_PINNED BIT_ULL(32) > #define __EXEC3_INTERNAL_FLAGS (~0ull << 32) > > @@ -45,7 +46,9 @@ > * execlist. Hence, no support for implicit sync. > * > * The new execbuf3 ioctl only works in VM_BIND mode and the VM_BIND > mode only > - * works with execbuf3 ioctl for submission. > + * works with execbuf3 ioctl for submission. All BOs mapped on that > VM (through > + * VM_BIND call) at the time of execbuf3 call are deemed required > for that > + * submission. > * > * The execbuf3 ioctl directly specifies the batch addresses instead > of as > * object handles as in execbuf2 ioctl. The execbuf3 ioctl will also > not > @@ -61,6 +64,13 @@ > * So, a lot of code supporting execbuf2 ioctl, like relocations, VA > evictions, > * vma lookup table, implicit sync, vma active reference tracking > etc., are not > * applicable for execbuf3 ioctl. > + * > + * During each execbuf submission, request fence is added to all > VM_BIND mapped > + * objects with DMA_RESV_USAGE_BOOKKEEP. The DMA_RESV_USAGE_BOOKKEEP > usage will > + * prevent over sync (See enum dma_resv_usage). Note that > DRM_I915_GEM_WAIT and > + * DRM_I915_GEM_BUSY ioctls do not check for DMA_RESV_USAGE_BOOKKEEP > usage and > + * hence should not be used for end of batch check. Instead, the > execbuf3 > + * timeline out fence should be used for end of batch check. > */ > > struct eb_fence { > @@ -124,6 +134,19 @@ eb_find_vma(struct i915_address_space *vm, u64 > addr) > return i915_gem_vm_bind_lookup_vma(vm, va); > } > > +static void eb_scoop_unbound_vmas(struct i915_address_space *vm) > +{ > + struct i915_vma *vma, *vn; > + > + spin_lock(&vm->vm_rebind_lock); > + list_for_each_entry_safe(vma, vn, &vm->vm_rebind_list, > vm_rebind_link) { > + list_del_init(&vma->vm_rebind_link); > + if (!list_empty(&vma->vm_bind_link)) > + list_move_tail(&vma->vm_bind_link, &vm- > >vm_bind_list); > + } > + spin_unlock(&vm->vm_rebind_lock); > +} > + > static int eb_lookup_vmas(struct i915_execbuffer *eb) > { > unsigned int i, current_batch = 0; > @@ -138,11 +161,118 @@ static int eb_lookup_vmas(struct > i915_execbuffer *eb) > ++current_batch; > } > > + eb_scoop_unbound_vmas(eb->context->vm); > + > + return 0; > +} > + > +static int eb_lock_vmas(struct i915_execbuffer *eb) > +{ > + struct i915_address_space *vm = eb->context->vm; > + struct i915_vma *vma; > + int err; > + > + err = i915_gem_vm_priv_lock(eb->context->vm, &eb->ww); > + if (err) > + return err; > + See comment in review for 08/10 about re-checking the rebind list here. > + list_for_each_entry(vma, &vm->non_priv_vm_bind_list, > + non_priv_vm_bind_link) { > + err = i915_gem_object_lock(vma->obj, &eb->ww); > + if (err) > + return err; > + } > + > return 0; > } > > +static void eb_release_persistent_vmas(struct i915_execbuffer *eb, > bool final) > +{ > + struct i915_address_space *vm = eb->context->vm; > + struct i915_vma *vma, *vn; > + > + assert_vm_bind_held(vm); > + > + if (!(eb->args->flags & __EXEC3_HAS_PIN)) > + return; > + > + assert_vm_priv_held(vm); > + > + list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link) > + __i915_vma_unpin(vma); > + > + eb->args->flags &= ~__EXEC3_HAS_PIN; > + if (!final) > + return; > + > + list_for_each_entry_safe(vma, vn, &vm->vm_bind_list, > vm_bind_link) > + if (i915_vma_is_bind_complete(vma)) > + list_move_tail(&vma->vm_bind_link, &vm- > >vm_bound_list); > +} > + > static void eb_release_vmas(struct i915_execbuffer *eb, bool final) > { > + eb_release_persistent_vmas(eb, final); > + eb_unpin_engine(eb); > +} > + > +static int eb_reserve_fence_for_persistent_vmas(struct > i915_execbuffer *eb) > +{ > + struct i915_address_space *vm = eb->context->vm; > + struct i915_vma *vma; > + int ret; > + > + ret = dma_resv_reserve_fences(vm->root_obj->base.resv, 1); > + if (ret) > + return ret; > + > + list_for_each_entry(vma, &vm->non_priv_vm_bind_list, > + non_priv_vm_bind_link) { > + ret = dma_resv_reserve_fences(vma->obj->base.resv, > 1); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > +static int eb_validate_persistent_vmas(struct i915_execbuffer *eb) > +{ > + struct i915_address_space *vm = eb->context->vm; > + struct i915_vma *vma, *last_pinned_vma = NULL; > + int ret = 0; > + > + assert_vm_bind_held(vm); > + assert_vm_priv_held(vm); > + > + ret = eb_reserve_fence_for_persistent_vmas(eb); > + if (ret) > + return ret; > + > + if (list_empty(&vm->vm_bind_list)) > + return 0; > + > + list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link) { > + u64 pin_flags = vma->start | PIN_OFFSET_FIXED | > PIN_USER; > + > + ret = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); > + if (ret) > + break; > + > + last_pinned_vma = vma; > + } > + > + if (ret && last_pinned_vma) { > + list_for_each_entry(vma, &vm->vm_bind_list, > vm_bind_link) { > + __i915_vma_unpin(vma); > + if (vma == last_pinned_vma) > + break; > + } > + } else if (last_pinned_vma) { > + eb->args->flags |= __EXEC3_HAS_PIN; > + } > + > + return ret; > } > > static int eb_validate_vmas(struct i915_execbuffer *eb) > @@ -162,8 +292,17 @@ static int eb_validate_vmas(struct > i915_execbuffer *eb) > /* only throttle once, even if we didn't need to throttle */ > throttle = false; > > + err = eb_lock_vmas(eb); > + if (err) > + goto err; > + > + err = eb_validate_persistent_vmas(eb); > + if (err) > + goto err; > + > err: > if (err == -EDEADLK) { > + eb_release_vmas(eb, false); > err = i915_gem_ww_ctx_backoff(&eb->ww); > if (!err) > goto retry; > @@ -187,8 +326,43 @@ static int eb_validate_vmas(struct > i915_execbuffer *eb) > BUILD_BUG_ON(!typecheck(int, _i)); \ > for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i)) > > +static void __eb_persistent_add_shared_fence(struct > drm_i915_gem_object *obj, > + struct dma_fence *fence) > +{ > + dma_resv_add_fence(obj->base.resv, fence, > DMA_RESV_USAGE_BOOKKEEP); > + obj->write_domain = 0; > + obj->read_domains |= I915_GEM_GPU_DOMAINS; > + obj->mm.dirty = true; > +} > + > +static void eb_persistent_add_shared_fence(struct i915_execbuffer > *eb) > +{ > + struct i915_address_space *vm = eb->context->vm; > + struct dma_fence *fence; > + struct i915_vma *vma; > + > + fence = eb->composite_fence ? eb->composite_fence : > + &eb->requests[0]->fence; > + > + __eb_persistent_add_shared_fence(vm->root_obj, fence); > + list_for_each_entry(vma, &vm->non_priv_vm_bind_list, > + non_priv_vm_bind_link) > + __eb_persistent_add_shared_fence(vma->obj, fence); > +} > + > +static void eb_persistent_vmas_move_to_active(struct i915_execbuffer > *eb) > +{ > + /* Add fence to BOs dma-resv fence list */ > + eb_persistent_add_shared_fence(eb); This means we don't add any fences to the vma active trackers. While this works fine for TTM delayed buffer destruction, unbinding at eviction and shrinking wouldn't wait for gpu activity to idle before unbinding? /Thomas