On Fri, 2022-07-08 at 05:44 -0700, Niranjana Vishwanathapura wrote: > On Thu, Jul 07, 2022 at 07:54:16AM -0700, Hellstrom, Thomas wrote: > > On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote: > > > Handle persistent (VM_BIND) mappings during the request > > > submission > > > in the execbuf3 path. > > > > > > Signed-off-by: Niranjana Vishwanathapura > > > <niranjana.vishwanathapura@xxxxxxxxx> > > > --- > > > .../gpu/drm/i915/gem/i915_gem_execbuffer3.c | 176 > > > +++++++++++++++++- > > > 1 file changed, 175 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > > > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > > > index 13121df72e3d..2079f5ca9010 100644 > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > > > @@ -22,6 +22,7 @@ > > > #include "i915_gem_vm_bind.h" > > > #include "i915_trace.h" > > > > > > +#define __EXEC3_HAS_PIN BIT_ULL(33) > > > #define __EXEC3_ENGINE_PINNED BIT_ULL(32) > > > #define __EXEC3_INTERNAL_FLAGS (~0ull << 32) > > > > > > @@ -45,7 +46,9 @@ > > > * execlist. Hence, no support for implicit sync. > > > * > > > * The new execbuf3 ioctl only works in VM_BIND mode and the > > > VM_BIND > > > mode only > > > - * works with execbuf3 ioctl for submission. > > > + * works with execbuf3 ioctl for submission. All BOs mapped on > > > that > > > VM (through > > > + * VM_BIND call) at the time of execbuf3 call are deemed > > > required > > > for that > > > + * submission. > > > * > > > * The execbuf3 ioctl directly specifies the batch addresses > > > instead > > > of as > > > * object handles as in execbuf2 ioctl. The execbuf3 ioctl will > > > also > > > not > > > @@ -61,6 +64,13 @@ > > > * So, a lot of code supporting execbuf2 ioctl, like > > > relocations, VA > > > evictions, > > > * vma lookup table, implicit sync, vma active reference > > > tracking > > > etc., are not > > > * applicable for execbuf3 ioctl. > > > + * > > > + * During each execbuf submission, request fence is added to all > > > VM_BIND mapped > > > + * objects with DMA_RESV_USAGE_BOOKKEEP. The > > > DMA_RESV_USAGE_BOOKKEEP > > > usage will > > > + * prevent over sync (See enum dma_resv_usage). Note that > > > DRM_I915_GEM_WAIT and > > > + * DRM_I915_GEM_BUSY ioctls do not check for > > > DMA_RESV_USAGE_BOOKKEEP > > > usage and > > > + * hence should not be used for end of batch check. Instead, the > > > execbuf3 > > > + * timeline out fence should be used for end of batch check. > > > */ > > > > > > struct eb_fence { > > > @@ -124,6 +134,19 @@ eb_find_vma(struct i915_address_space *vm, > > > u64 > > > addr) > > > return i915_gem_vm_bind_lookup_vma(vm, va); > > > } > > > > > > +static void eb_scoop_unbound_vmas(struct i915_address_space *vm) > > > +{ > > > + struct i915_vma *vma, *vn; > > > + > > > + spin_lock(&vm->vm_rebind_lock); > > > + list_for_each_entry_safe(vma, vn, &vm->vm_rebind_list, > > > vm_rebind_link) { > > > + list_del_init(&vma->vm_rebind_link); > > > + if (!list_empty(&vma->vm_bind_link)) > > > + list_move_tail(&vma->vm_bind_link, &vm- > > > > vm_bind_list); > > > + } > > > + spin_unlock(&vm->vm_rebind_lock); > > > +} > > > + > > > static int eb_lookup_vmas(struct i915_execbuffer *eb) > > > { > > > unsigned int i, current_batch = 0; > > > @@ -138,11 +161,118 @@ static int eb_lookup_vmas(struct > > > i915_execbuffer *eb) > > > ++current_batch; > > > } > > > > > > + eb_scoop_unbound_vmas(eb->context->vm); > > > + > > > + return 0; > > > +} > > > + > > > +static int eb_lock_vmas(struct i915_execbuffer *eb) > > > +{ > > > + struct i915_address_space *vm = eb->context->vm; > > > + struct i915_vma *vma; > > > + int err; > > > + > > > + err = i915_gem_vm_priv_lock(eb->context->vm, &eb->ww); > > > + if (err) > > > + return err; > > > + > > > > See comment in review for 08/10 about re-checking the rebind list > > here. > > > > > > > > > + list_for_each_entry(vma, &vm->non_priv_vm_bind_list, > > > + non_priv_vm_bind_link) { > > > + err = i915_gem_object_lock(vma->obj, &eb->ww); > > > + if (err) > > > + return err; > > > + } > > > + > > > return 0; > > > } > > > > > > +static void eb_release_persistent_vmas(struct i915_execbuffer > > > *eb, > > > bool final) > > > +{ > > > + struct i915_address_space *vm = eb->context->vm; > > > + struct i915_vma *vma, *vn; > > > + > > > + assert_vm_bind_held(vm); > > > + > > > + if (!(eb->args->flags & __EXEC3_HAS_PIN)) > > > + return; > > > + > > > + assert_vm_priv_held(vm); > > > + > > > + list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link) > > > + __i915_vma_unpin(vma); > > > + > > > + eb->args->flags &= ~__EXEC3_HAS_PIN; > > > + if (!final) > > > + return; > > > + > > > + list_for_each_entry_safe(vma, vn, &vm->vm_bind_list, > > > vm_bind_link) > > > + if (i915_vma_is_bind_complete(vma)) > > > + list_move_tail(&vma->vm_bind_link, &vm- > > > > vm_bound_list); > > > +} > > > + > > > static void eb_release_vmas(struct i915_execbuffer *eb, bool > > > final) > > > { > > > + eb_release_persistent_vmas(eb, final); > > > + eb_unpin_engine(eb); > > > +} > > > + > > > +static int eb_reserve_fence_for_persistent_vmas(struct > > > i915_execbuffer *eb) > > > +{ > > > + struct i915_address_space *vm = eb->context->vm; > > > + struct i915_vma *vma; > > > + int ret; > > > + > > > + ret = dma_resv_reserve_fences(vm->root_obj->base.resv, > > > 1); > > > + if (ret) > > > + return ret; > > > + > > > + list_for_each_entry(vma, &vm->non_priv_vm_bind_list, > > > + non_priv_vm_bind_link) { > > > + ret = dma_resv_reserve_fences(vma->obj- > > > >base.resv, > > > 1); > > > + if (ret) > > > + return ret; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static int eb_validate_persistent_vmas(struct i915_execbuffer > > > *eb) > > > +{ > > > + struct i915_address_space *vm = eb->context->vm; > > > + struct i915_vma *vma, *last_pinned_vma = NULL; > > > + int ret = 0; > > > + > > > + assert_vm_bind_held(vm); > > > + assert_vm_priv_held(vm); > > > + > > > + ret = eb_reserve_fence_for_persistent_vmas(eb); > > > + if (ret) > > > + return ret; > > > + > > > + if (list_empty(&vm->vm_bind_list)) > > > + return 0; > > > + > > > + list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link) > > > { > > > + u64 pin_flags = vma->start | PIN_OFFSET_FIXED | > > > PIN_USER; > > > + > > > + ret = i915_vma_pin_ww(vma, &eb->ww, 0, 0, > > > pin_flags); > > > + if (ret) > > > + break; > > > + > > > + last_pinned_vma = vma; > > > + } > > > + > > > + if (ret && last_pinned_vma) { > > > + list_for_each_entry(vma, &vm->vm_bind_list, > > > vm_bind_link) { > > > + __i915_vma_unpin(vma); > > > + if (vma == last_pinned_vma) > > > + break; > > > + } > > > + } else if (last_pinned_vma) { > > > + eb->args->flags |= __EXEC3_HAS_PIN; > > > + } > > > + > > > + return ret; > > > } > > > > > > static int eb_validate_vmas(struct i915_execbuffer *eb) > > > @@ -162,8 +292,17 @@ static int eb_validate_vmas(struct > > > i915_execbuffer *eb) > > > /* only throttle once, even if we didn't need to throttle > > > */ > > > throttle = false; > > > > > > + err = eb_lock_vmas(eb); > > > + if (err) > > > + goto err; > > > + > > > + err = eb_validate_persistent_vmas(eb); > > > + if (err) > > > + goto err; > > > + > > > err: > > > if (err == -EDEADLK) { > > > + eb_release_vmas(eb, false); > > > err = i915_gem_ww_ctx_backoff(&eb->ww); > > > if (!err) > > > goto retry; > > > @@ -187,8 +326,43 @@ static int eb_validate_vmas(struct > > > i915_execbuffer *eb) > > > BUILD_BUG_ON(!typecheck(int, _i)); \ > > > for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i)) > > > > > > +static void __eb_persistent_add_shared_fence(struct > > > drm_i915_gem_object *obj, > > > + struct dma_fence > > > *fence) > > > +{ > > > + dma_resv_add_fence(obj->base.resv, fence, > > > DMA_RESV_USAGE_BOOKKEEP); > > > + obj->write_domain = 0; > > > + obj->read_domains |= I915_GEM_GPU_DOMAINS; > > > + obj->mm.dirty = true; > > > +} > > > + > > > +static void eb_persistent_add_shared_fence(struct > > > i915_execbuffer > > > *eb) > > > +{ > > > + struct i915_address_space *vm = eb->context->vm; > > > + struct dma_fence *fence; > > > + struct i915_vma *vma; > > > + > > > + fence = eb->composite_fence ? eb->composite_fence : > > > + &eb->requests[0]->fence; > > > + > > > + __eb_persistent_add_shared_fence(vm->root_obj, fence); > > > + list_for_each_entry(vma, &vm->non_priv_vm_bind_list, > > > + non_priv_vm_bind_link) > > > + __eb_persistent_add_shared_fence(vma->obj, > > > fence); > > > +} > > > + > > > +static void eb_persistent_vmas_move_to_active(struct > > > i915_execbuffer > > > *eb) > > > +{ > > > + /* Add fence to BOs dma-resv fence list */ > > > + eb_persistent_add_shared_fence(eb); > > > > This means we don't add any fences to the vma active trackers. > > While > > this works fine for TTM delayed buffer destruction, unbinding at > > eviction and shrinking wouldn't wait for gpu activity to idle > > before > > unbinding? > > Eviction and shrinker will wait for gpu activity to idle before > unbinding. > The i915_vma_is_active() and i915_vma_sync() have been updated to > handle > the persistent vmas to differntly (by checking/waiting for dma-resv > fence > list). Ah, yes. Now I see. Still the async unbinding __i915_vma_unbind_async() needs update? Should probably add a i915_sw_fence_await_reservation() (modified if needed for bookkeeping fences) of the vm's reservation object there? /Thomas > > Niranjana > > > > > > > /Thomas > >