On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote: > For persistent (vm_bind) vmas of userptr BOs, handle the user > page pinning by using the i915_gem_object_userptr_submit_init() > /done() functions > > Signed-off-by: Niranjana Vishwanathapura > <niranjana.vishwanathapura@xxxxxxxxx> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer3.c | 67 > +++++++++++++++++++ > .../drm/i915/gem/i915_gem_vm_bind_object.c | 16 +++++ > drivers/gpu/drm/i915/gt/intel_gtt.c | 1 + > drivers/gpu/drm/i915/gt/intel_gtt.h | 1 + > 4 files changed, 85 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > index 2079f5ca9010..bf13dd6d642e 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c > @@ -22,6 +22,7 @@ > #include "i915_gem_vm_bind.h" > #include "i915_trace.h" > > +#define __EXEC3_USERPTR_USED BIT_ULL(34) > #define __EXEC3_HAS_PIN BIT_ULL(33) > #define __EXEC3_ENGINE_PINNED BIT_ULL(32) > #define __EXEC3_INTERNAL_FLAGS (~0ull << 32) > @@ -147,10 +148,36 @@ static void eb_scoop_unbound_vmas(struct > i915_address_space *vm) > spin_unlock(&vm->vm_rebind_lock); > } > > +static int eb_lookup_persistent_userptr_vmas(struct i915_execbuffer > *eb) > +{ > + struct i915_address_space *vm = eb->context->vm; > + struct i915_vma *last_vma = NULL; > + struct i915_vma *vma; > + int err; > + > + assert_vm_bind_held(vm); > + > + list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link) { > + if (i915_gem_object_is_userptr(vma->obj)) { > + err = > i915_gem_object_userptr_submit_init(vma->obj); > + if (err) > + return err; > + > + last_vma = vma; > + } > + } > + Don't we need to loop also over non-private userptr objects? > + if (last_vma) > + eb->args->flags |= __EXEC3_USERPTR_USED; > + > + return 0; > +} > + > static int eb_lookup_vmas(struct i915_execbuffer *eb) > { > unsigned int i, current_batch = 0; > struct i915_vma *vma; > + int err = 0; > > for (i = 0; i < eb->num_batches; i++) { > vma = eb_find_vma(eb->context->vm, eb- > >batch_addresses[i]); > @@ -163,6 +190,10 @@ static int eb_lookup_vmas(struct i915_execbuffer > *eb) > > eb_scoop_unbound_vmas(eb->context->vm); > > + err = eb_lookup_persistent_userptr_vmas(eb); > + if (err) > + return err; > + > return 0; > } > > @@ -358,15 +389,51 @@ static void > eb_persistent_vmas_move_to_active(struct i915_execbuffer *eb) > > static int eb_move_to_gpu(struct i915_execbuffer *eb) > { > + int err = 0, j; > + > assert_vm_bind_held(eb->context->vm); > assert_vm_priv_held(eb->context->vm); > > eb_persistent_vmas_move_to_active(eb); > > +#ifdef CONFIG_MMU_NOTIFIER > + if (!err && (eb->args->flags & __EXEC3_USERPTR_USED)) { > + struct i915_vma *vma; > + > + assert_vm_bind_held(eb->context->vm); > + assert_vm_priv_held(eb->context->vm); > + > + read_lock(&eb->i915->mm.notifier_lock); > + list_for_each_entry(vma, &eb->context->vm- > >vm_bind_list, > + vm_bind_link) { > + if (!i915_gem_object_is_userptr(vma->obj)) > + continue; > + > + err = > i915_gem_object_userptr_submit_done(vma->obj); > + if (err) > + break; > + } > + > + read_unlock(&eb->i915->mm.notifier_lock); > + } Since we don't loop over the vm_bound_list, there is a need to check whether the rebind_list is empty here under the notifier_lock in read mode, and in that case, restart from eb_lookup_vmas(). That might also eliminate the need for the __EXEC3_USERPTR_USED flag? That will also catch any objects that were evicted between eb_lookup_vmas() where the rebind_list was last checked, and i915_gem_vm_priv_lock(), which prohibits further eviction, but if we want to catch these earlier (which I think is a good idea), we could check that the rebind_list is indeed empty just after taking the vm_priv_lock(), and if not, restart from eb_lookup_vmas(). > +#endif > + > + if (unlikely(err)) > + goto err_skip; > + > /* Unconditionally flush any chipset caches (for streaming > writes). */ > intel_gt_chipset_flush(eb->gt); > > return 0; > + > +err_skip: > + for_each_batch_create_order(eb, j) { > + if (!eb->requests[j]) > + break; > + > + i915_request_set_error_once(eb->requests[j], err); > + } > + return err; > } > > static int eb_request_submit(struct i915_execbuffer *eb, > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c > b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c > index 1a8efa83547f..cae282b91618 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c > @@ -263,6 +263,12 @@ int i915_gem_vm_bind_obj(struct > i915_address_space *vm, > goto put_obj; > } > > + if (i915_gem_object_is_userptr(obj)) { > + ret = i915_gem_object_userptr_submit_init(obj); > + if (ret) > + goto put_obj; > + } > + > ret = i915_gem_vm_bind_lock_interruptible(vm); > if (ret) > goto put_obj; > @@ -295,6 +301,16 @@ int i915_gem_vm_bind_obj(struct > i915_address_space *vm, > /* Make it evictable */ > __i915_vma_unpin(vma); > > +#ifdef CONFIG_MMU_NOTIFIER > + if (i915_gem_object_is_userptr(obj)) { > + write_lock(&vm->i915->mm.notifier_lock); Why do we need the lock in write mode here? > + ret = i915_gem_object_userptr_submit_done(obj); > + write_unlock(&vm->i915->mm.notifier_lock); > + if (ret) > + goto out_ww; > + } > +#endif > + > list_add_tail(&vma->vm_bind_link, &vm->vm_bound_list); > i915_vm_bind_it_insert(vma, &vm->va); > if (!obj->priv_root) > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c > b/drivers/gpu/drm/i915/gt/intel_gtt.c > index 55d5389b2c6c..4ab3bda644ff 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c > @@ -295,6 +295,7 @@ void i915_address_space_init(struct > i915_address_space *vm, int subclass) > GEM_BUG_ON(IS_ERR(vm->root_obj)); > INIT_LIST_HEAD(&vm->vm_rebind_list); > spin_lock_init(&vm->vm_rebind_lock); > + INIT_LIST_HEAD(&vm->invalidate_link); > } > > void *__px_vaddr(struct drm_i915_gem_object *p) > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h > b/drivers/gpu/drm/i915/gt/intel_gtt.h > index fe5485c4a1cd..f9edf11c144f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h > @@ -267,6 +267,7 @@ struct i915_address_space { > struct list_head vm_bound_list; > struct list_head vm_rebind_list; > spinlock_t vm_rebind_lock; /* Protects vm_rebind_list */ > + struct list_head invalidate_link; > /* va tree of persistent vmas */ > struct rb_root_cached va; > struct list_head non_priv_vm_bind_list;