Re: [RFC 07/10] drm/i915/vm_bind: Handle persistent vmas in execbuf3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jul 07, 2022 at 07:54:16AM -0700, Hellstrom, Thomas wrote:
On Fri, 2022-07-01 at 15:50 -0700, Niranjana Vishwanathapura wrote:
Handle persistent (VM_BIND) mappings during the request submission
in the execbuf3 path.

Signed-off-by: Niranjana Vishwanathapura
<niranjana.vishwanathapura@xxxxxxxxx>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer3.c   | 176
+++++++++++++++++-
 1 file changed, 175 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c
index 13121df72e3d..2079f5ca9010 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer3.c
@@ -22,6 +22,7 @@
 #include "i915_gem_vm_bind.h"
 #include "i915_trace.h"

+#define __EXEC3_HAS_PIN                        BIT_ULL(33)
 #define __EXEC3_ENGINE_PINNED          BIT_ULL(32)
 #define __EXEC3_INTERNAL_FLAGS         (~0ull << 32)

@@ -45,7 +46,9 @@
  * execlist. Hence, no support for implicit sync.
  *
  * The new execbuf3 ioctl only works in VM_BIND mode and the VM_BIND
mode only
- * works with execbuf3 ioctl for submission.
+ * works with execbuf3 ioctl for submission. All BOs mapped on that
VM (through
+ * VM_BIND call) at the time of execbuf3 call are deemed required
for that
+ * submission.
  *
  * The execbuf3 ioctl directly specifies the batch addresses instead
of as
  * object handles as in execbuf2 ioctl. The execbuf3 ioctl will also
not
@@ -61,6 +64,13 @@
  * So, a lot of code supporting execbuf2 ioctl, like relocations, VA
evictions,
  * vma lookup table, implicit sync, vma active reference tracking
etc., are not
  * applicable for execbuf3 ioctl.
+ *
+ * During each execbuf submission, request fence is added to all
VM_BIND mapped
+ * objects with DMA_RESV_USAGE_BOOKKEEP. The DMA_RESV_USAGE_BOOKKEEP
usage will
+ * prevent over sync (See enum dma_resv_usage). Note that
DRM_I915_GEM_WAIT and
+ * DRM_I915_GEM_BUSY ioctls do not check for DMA_RESV_USAGE_BOOKKEEP
usage and
+ * hence should not be used for end of batch check. Instead, the
execbuf3
+ * timeline out fence should be used for end of batch check.
  */

 struct eb_fence {
@@ -124,6 +134,19 @@ eb_find_vma(struct i915_address_space *vm, u64
addr)
        return i915_gem_vm_bind_lookup_vma(vm, va);
 }

+static void eb_scoop_unbound_vmas(struct i915_address_space *vm)
+{
+       struct i915_vma *vma, *vn;
+
+       spin_lock(&vm->vm_rebind_lock);
+       list_for_each_entry_safe(vma, vn, &vm->vm_rebind_list,
vm_rebind_link) {
+               list_del_init(&vma->vm_rebind_link);
+               if (!list_empty(&vma->vm_bind_link))
+                       list_move_tail(&vma->vm_bind_link, &vm-
>vm_bind_list);
+       }
+       spin_unlock(&vm->vm_rebind_lock);
+}
+
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
        unsigned int i, current_batch = 0;
@@ -138,11 +161,118 @@ static int eb_lookup_vmas(struct
i915_execbuffer *eb)
                ++current_batch;
        }

+       eb_scoop_unbound_vmas(eb->context->vm);
+
+       return 0;
+}
+
+static int eb_lock_vmas(struct i915_execbuffer *eb)
+{
+       struct i915_address_space *vm = eb->context->vm;
+       struct i915_vma *vma;
+       int err;
+
+       err = i915_gem_vm_priv_lock(eb->context->vm, &eb->ww);
+       if (err)
+               return err;
+

See comment in review for 08/10 about re-checking the rebind list here.



+       list_for_each_entry(vma, &vm->non_priv_vm_bind_list,
+                           non_priv_vm_bind_link) {
+               err = i915_gem_object_lock(vma->obj, &eb->ww);
+               if (err)
+                       return err;
+       }
+
        return 0;
 }

+static void eb_release_persistent_vmas(struct i915_execbuffer *eb,
bool final)
+{
+       struct i915_address_space *vm = eb->context->vm;
+       struct i915_vma *vma, *vn;
+
+       assert_vm_bind_held(vm);
+
+       if (!(eb->args->flags & __EXEC3_HAS_PIN))
+               return;
+
+       assert_vm_priv_held(vm);
+
+       list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link)
+               __i915_vma_unpin(vma);
+
+       eb->args->flags &= ~__EXEC3_HAS_PIN;
+       if (!final)
+               return;
+
+       list_for_each_entry_safe(vma, vn, &vm->vm_bind_list,
vm_bind_link)
+               if (i915_vma_is_bind_complete(vma))
+                       list_move_tail(&vma->vm_bind_link, &vm-
>vm_bound_list);
+}
+
 static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
 {
+       eb_release_persistent_vmas(eb, final);
+       eb_unpin_engine(eb);
+}
+
+static int eb_reserve_fence_for_persistent_vmas(struct
i915_execbuffer *eb)
+{
+       struct i915_address_space *vm = eb->context->vm;
+       struct i915_vma *vma;
+       int ret;
+
+       ret = dma_resv_reserve_fences(vm->root_obj->base.resv, 1);
+       if (ret)
+               return ret;
+
+       list_for_each_entry(vma, &vm->non_priv_vm_bind_list,
+                           non_priv_vm_bind_link) {
+               ret = dma_resv_reserve_fences(vma->obj->base.resv,
1);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int eb_validate_persistent_vmas(struct i915_execbuffer *eb)
+{
+       struct i915_address_space *vm = eb->context->vm;
+       struct i915_vma *vma, *last_pinned_vma = NULL;
+       int ret = 0;
+
+       assert_vm_bind_held(vm);
+       assert_vm_priv_held(vm);
+
+       ret = eb_reserve_fence_for_persistent_vmas(eb);
+       if (ret)
+               return ret;
+
+       if (list_empty(&vm->vm_bind_list))
+               return 0;
+
+       list_for_each_entry(vma, &vm->vm_bind_list, vm_bind_link) {
+               u64 pin_flags = vma->start | PIN_OFFSET_FIXED |
PIN_USER;
+
+               ret = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags);
+               if (ret)
+                       break;
+
+               last_pinned_vma = vma;
+       }
+
+       if (ret && last_pinned_vma) {
+               list_for_each_entry(vma, &vm->vm_bind_list,
vm_bind_link) {
+                       __i915_vma_unpin(vma);
+                       if (vma == last_pinned_vma)
+                               break;
+               }
+       } else if (last_pinned_vma) {
+               eb->args->flags |= __EXEC3_HAS_PIN;
+       }
+
+       return ret;
 }

 static int eb_validate_vmas(struct i915_execbuffer *eb)
@@ -162,8 +292,17 @@ static int eb_validate_vmas(struct
i915_execbuffer *eb)
        /* only throttle once, even if we didn't need to throttle */
        throttle = false;

+       err = eb_lock_vmas(eb);
+       if (err)
+               goto err;
+
+       err = eb_validate_persistent_vmas(eb);
+       if (err)
+               goto err;
+
 err:
        if (err == -EDEADLK) {
+               eb_release_vmas(eb, false);
                err = i915_gem_ww_ctx_backoff(&eb->ww);
                if (!err)
                        goto retry;
@@ -187,8 +326,43 @@ static int eb_validate_vmas(struct
i915_execbuffer *eb)
        BUILD_BUG_ON(!typecheck(int, _i)); \
        for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))

+static void __eb_persistent_add_shared_fence(struct
drm_i915_gem_object *obj,
+                                            struct dma_fence *fence)
+{
+       dma_resv_add_fence(obj->base.resv, fence,
DMA_RESV_USAGE_BOOKKEEP);
+       obj->write_domain = 0;
+       obj->read_domains |= I915_GEM_GPU_DOMAINS;
+       obj->mm.dirty = true;
+}
+
+static void eb_persistent_add_shared_fence(struct i915_execbuffer
*eb)
+{
+       struct i915_address_space *vm = eb->context->vm;
+       struct dma_fence *fence;
+       struct i915_vma *vma;
+
+       fence = eb->composite_fence ? eb->composite_fence :
+               &eb->requests[0]->fence;
+
+       __eb_persistent_add_shared_fence(vm->root_obj, fence);
+       list_for_each_entry(vma, &vm->non_priv_vm_bind_list,
+                           non_priv_vm_bind_link)
+               __eb_persistent_add_shared_fence(vma->obj, fence);
+}
+
+static void eb_persistent_vmas_move_to_active(struct i915_execbuffer
*eb)
+{
+       /* Add fence to BOs dma-resv fence list */
+       eb_persistent_add_shared_fence(eb);

This means we don't add any fences to the vma active trackers. While
this works fine for TTM delayed buffer destruction, unbinding at
eviction and shrinking wouldn't wait for gpu activity to idle before
unbinding?

Eviction and shrinker will wait for gpu activity to idle before unbinding.
The i915_vma_is_active() and i915_vma_sync() have been updated to handle
the persistent vmas to differntly (by checking/waiting for dma-resv fence
list).

Niranjana



/Thomas




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux