Re: [PATCH v4 09/17] drm/i915/vm_bind: Add out fence support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 19/10/2022 03:43, Niranjana Vishwanathapura wrote:
On Tue, Oct 18, 2022 at 04:28:07PM +0100, Matthew Auld wrote:
On 18/10/2022 08:16, Niranjana Vishwanathapura wrote:
Add support for handling out fence for vm_bind call.

v2: Reset vma->vm_bind_fence.syncobj to NULL at the end
    of vm_bind call.
v3: Remove vm_unbind out fence uapi which is not supported yet.

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@xxxxxxxxx>
Signed-off-by: Andi Shyti <andi.shyti@xxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h   |  4 +
 .../drm/i915/gem/i915_gem_vm_bind_object.c    | 82 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_vma.c               |  7 +-
 drivers/gpu/drm/i915/i915_vma_types.h         |  7 ++
 include/uapi/drm/i915_drm.h                   | 49 ++++++++++-
 5 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
index 36262a6357b5..b70e900e35ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind.h
@@ -8,6 +8,7 @@
 #include <linux/types.h>
+struct dma_fence;
 struct drm_device;
 struct drm_file;
 struct i915_address_space;
@@ -23,4 +24,7 @@ int i915_gem_vm_unbind_ioctl(struct drm_device *dev, void *data,
 void i915_gem_vm_unbind_all(struct i915_address_space *vm);
+void i915_vm_bind_signal_fence(struct i915_vma *vma,
+                   struct dma_fence * const fence);
+
 #endif /* __I915_GEM_VM_BIND_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
index 3ea3cb3ed97e..63889ba00183 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_vm_bind_object.c
@@ -7,6 +7,8 @@
 #include <linux/interval_tree_generic.h>
+#include <drm/drm_syncobj.h>
+
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_vm_bind.h"
@@ -100,6 +102,76 @@ static void i915_gem_vm_bind_remove(struct i915_vma *vma, bool release_obj)
         i915_gem_object_put(vma->obj);
 }
+static int i915_vm_bind_add_fence(struct drm_file *file, struct i915_vma *vma,
+                  u32 handle, u64 point)
+{
+    struct drm_syncobj *syncobj;
+
+    syncobj = drm_syncobj_find(file, handle);
+    if (!syncobj) {
+        DRM_DEBUG("Invalid syncobj handle provided\n");
+        return -ENOENT;
+    }
+
+    /*
+     * For timeline syncobjs we need to preallocate chains for
+     * later signaling.
+     */
+    if (point) {
+        vma->vm_bind_fence.chain_fence = dma_fence_chain_alloc();
+        if (!vma->vm_bind_fence.chain_fence) {
+            drm_syncobj_put(syncobj);
+            return -ENOMEM;
+        }
+    } else {
+        vma->vm_bind_fence.chain_fence = NULL;
+    }
+    vma->vm_bind_fence.syncobj = syncobj;
+    vma->vm_bind_fence.value = point;
+
+    return 0;
+}
+
+static void i915_vm_bind_put_fence(struct i915_vma *vma)
+{
+    if (!vma->vm_bind_fence.syncobj)
+        return;
+
+    drm_syncobj_put(vma->vm_bind_fence.syncobj);
+    dma_fence_chain_free(vma->vm_bind_fence.chain_fence);
+    vma->vm_bind_fence.syncobj = NULL;
+}
+
+/**
+ * i915_vm_bind_signal_fence() - Add fence to vm_bind syncobj
+ * @vma: vma mapping requiring signaling
+ * @fence: fence to be added
+ *
+ * Associate specified @fence with the @vma's syncobj to be
+ * signaled after the @fence work completes.
+ */
+void i915_vm_bind_signal_fence(struct i915_vma *vma,
+                   struct dma_fence * const fence)
+{
+    struct drm_syncobj *syncobj = vma->vm_bind_fence.syncobj;
+
+    if (!syncobj)
+        return;
+
+    if (vma->vm_bind_fence.chain_fence) {
+        drm_syncobj_add_point(syncobj,
+                      vma->vm_bind_fence.chain_fence,
+                      fence, vma->vm_bind_fence.value);
+        /*
+         * The chain's ownership is transferred to the
+         * timeline.
+         */
+        vma->vm_bind_fence.chain_fence = NULL;
+    } else {
+        drm_syncobj_replace_fence(syncobj, fence);
+    }
+}
+
 static int i915_gem_vm_unbind_vma(struct i915_address_space *vm,
                   struct drm_i915_gem_vm_unbind *va)
 {
@@ -237,6 +309,13 @@ static int i915_gem_vm_bind_obj(struct i915_address_space *vm,
         goto unlock_vm;
     }
+    if (va->fence.flags & I915_TIMELINE_FENCE_SIGNAL) {
+        ret = i915_vm_bind_add_fence(file, vma, va->fence.handle,
+                         va->fence.value);
+        if (ret)
+            goto put_vma;
+    }
+
     pin_flags = va->start | PIN_OFFSET_FIXED | PIN_USER | PIN_VALIDATE;
     for_i915_gem_ww(&ww, ret, true) {
@@ -258,6 +337,9 @@ static int i915_gem_vm_bind_obj(struct i915_address_space *vm,
         i915_gem_object_get(vma->obj);
     }
+    if (va->fence.flags & I915_TIMELINE_FENCE_SIGNAL)
+        i915_vm_bind_put_fence(vma);
+put_vma:
     if (ret)
         i915_vma_destroy(vma);
 unlock_vm:
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 04abdb92c2b2..eaa13e9ba966 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -29,6 +29,7 @@
 #include "display/intel_frontbuffer.h"
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_tiling.h"
+#include "gem/i915_gem_vm_bind.h"
 #include "gt/intel_engine.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_gt.h"
@@ -1567,8 +1568,12 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 err_vma_res:
     i915_vma_resource_free(vma_res);
 err_fence:
-    if (work)
+    if (work) {
+        if (i915_vma_is_persistent(vma))
+            i915_vm_bind_signal_fence(vma, &work->base.dma);
+
         dma_fence_work_commit_imm(&work->base);
+    }
 err_rpm:
     if (wakeref)
         intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index d32c72e8d242..2c740500ac1b 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -308,6 +308,13 @@ struct i915_vma {
     /* @vm_rebind_link: link to vm_rebind_list and protected by vm_rebind_lock */
     struct list_head vm_rebind_link; /* Link in vm_rebind_list */
+    /** Timeline fence for vm_bind completion notification */
+    struct {
+        struct dma_fence_chain *chain_fence;
+        struct drm_syncobj *syncobj;
+        u64 value;
+    } vm_bind_fence;
+
     /** Interval tree structures for persistent vma */
     /** @rb: node for the interval tree of vm for persistent vmas */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4383ed85f10a..87f5c2a470f5 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1527,6 +1527,41 @@ struct drm_i915_gem_execbuffer2 {
 #define i915_execbuffer2_get_context_id(eb2) \
     ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/**
+ * struct drm_i915_gem_timeline_fence - An input or output timeline fence.
+ *
+ * The operation will wait for input fence to signal.
+ *
+ * The returned output fence will be signaled after the completion of the
+ * operation.
+ */
+struct drm_i915_gem_timeline_fence {
+    /** @handle: User's handle for a drm_syncobj to wait on or signal. */
+    __u32 handle;
+
+    /**
+     * @flags: Supported flags are:
+     *
+     * I915_TIMELINE_FENCE_WAIT:
+     * Wait for the input fence before the operation.
+     *
+     * I915_TIMELINE_FENCE_SIGNAL:
+     * Return operation completion fence as output.
+     */
+    __u32 flags;
+#define I915_TIMELINE_FENCE_WAIT            (1 << 0)
+#define I915_TIMELINE_FENCE_SIGNAL          (1 << 1)

So this is the out-fence for bind completion, which then gets fed into execbuf? Is the idea here to always get an out fence, and feed that into execbuf to correctly order the bind(s) vs submit? i.e that's left to userspace. IIRC we now do await_bind() in execbuf3 which I guess forces the synchronisation, so just wondering if we need that, or what's the story here with the out-fence for vm_bind?


Yah, vm_bind and execbuf3 are independent paths and user has to
resolve any dependencies.
I will be adding support for synchronous bind if vm_bind out fence is
not requested (more below).
Execbuf3 path will only await_bind() for those mappings which it
started the binding for (the evicted/invalidated mappings which it
scooped up). It won't await_bind() for mappings bound through vm_bind
ioctl call.

Ah, got it. Thanks.


+#define __I915_TIMELINE_FENCE_UNKNOWN_FLAGS (-(I915_TIMELINE_FENCE_SIGNAL << 1))
+
+    /**
+     * @value: A point in the timeline.
+     * Value must be 0 for a binary drm_syncobj. A Value of 0 for a
+     * timeline drm_syncobj is invalid as it turns a drm_syncobj into a
+     * binary one.
+     */
+    __u64 value;
+};
+
 struct drm_i915_gem_pin {
     /** Handle of the buffer to be pinned. */
     __u32 handle;
@@ -3826,8 +3861,18 @@ struct drm_i915_gem_vm_bind {
      */
     __u64 flags;
-    /** @rsvd: Reserved, MBZ */
-    __u64 rsvd[2];
+    /**
+     * @fence: Timeline fence for bind completion signaling.
+     *
+     * Timeline fence is of format struct drm_i915_gem_timeline_fence.
+     *
+     * It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag
+     * is invalid, and an error will be returned.

Where we do reject that? Maybe I'm blind...


No you are not :)...will add the check in vm_bind call.

+     *
+     * If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence
+     * is not requested and binding is completed synchronously.

"completed synchronously", where do we do that currently?


Hmm...looks like are are not. Will add the wait call in vm_bind path.

Niranjana

+     */
+    struct drm_i915_gem_timeline_fence fence;
     /**
      * @extensions: Zero-terminated chain of extensions.



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux