Moving SINGLE_TIMELINE to a syncobj emulation has a couple of technical
advantages beyond mere annoyance. One is that intel_timeline is no
longer an api-visible object and can remain entirely an implementation
detail. This may be advantageous as we make scheduler changes going
forward. Second is that, together with deleting the CLONE_CONTEXT API,
we should now have a 1:1 mapping between intel_context and
intel_timeline which may help us reduce locking.
v2 (Jason Ekstrand):
- Update the comment on i915_gem_context::syncobj to mention that it's
an emulation and the possible race if userspace calls execbuffer2
twice on the same context concurrently.
- Wrap the checks for eb.gem_context->syncobj in unlikely()
- Drop the dma_fence reference
- Improved commit message
v3 (Jason Ekstrand):
- Move the dma_fence_put() to before the error exit
Signed-off-by: Jason Ekstrand <jason@xxxxxxxxxxxxxx>
Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
Cc: Matthew Brost <matthew.brost@xxxxxxxxx>
---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 49 +++++--------------
.../gpu/drm/i915/gem/i915_gem_context_types.h | 14 +++++-
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 16 ++++++
3 files changed, 40 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 2c2fefa912805..a72c9b256723b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -67,6 +67,8 @@
#include <linux/log2.h>
#include <linux/nospec.h>
+#include <drm/drm_syncobj.h>
+
#include "gt/gen6_ppgtt.h"
#include "gt/intel_context.h"
#include "gt/intel_context_param.h"
@@ -225,10 +227,6 @@ static void intel_context_set_gem(struct intel_context *ce,
ce->vm = vm;
}
- GEM_BUG_ON(ce->timeline);
- if (ctx->timeline)
- ce->timeline = intel_timeline_get(ctx->timeline);
-
if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
intel_engine_has_timeslices(ce->engine))
__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
@@ -351,9 +349,6 @@ void i915_gem_context_release(struct kref *ref)
mutex_destroy(&ctx->engines_mutex);
mutex_destroy(&ctx->lut_mutex);
- if (ctx->timeline)
- intel_timeline_put(ctx->timeline);
-
put_pid(ctx->pid);
mutex_destroy(&ctx->mutex);
@@ -570,6 +565,9 @@ static void context_close(struct i915_gem_context *ctx)
if (vm)
i915_vm_close(vm);
+ if (ctx->syncobj)
+ drm_syncobj_put(ctx->syncobj);
+
ctx->file_priv = ERR_PTR(-EBADF);
/*
@@ -765,33 +763,11 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
i915_vm_close(vm);
}
-static void __set_timeline(struct intel_timeline **dst,
- struct intel_timeline *src)
-{
- struct intel_timeline *old = *dst;
-
- *dst = src ? intel_timeline_get(src) : NULL;
-
- if (old)
- intel_timeline_put(old);
-}
-
-static void __apply_timeline(struct intel_context *ce, void *timeline)
-{
- __set_timeline(&ce->timeline, timeline);
-}
-
-static void __assign_timeline(struct i915_gem_context *ctx,
- struct intel_timeline *timeline)
-{
- __set_timeline(&ctx->timeline, timeline);
- context_apply_all(ctx, __apply_timeline, timeline);
-}
-
static struct i915_gem_context *
i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
{
struct i915_gem_context *ctx;
+ int ret;
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
!HAS_EXECLISTS(i915))
@@ -820,16 +796,13 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
}
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
- struct intel_timeline *timeline;
-
- timeline = intel_timeline_create(&i915->gt);
- if (IS_ERR(timeline)) {
+ ret = drm_syncobj_create(&ctx->syncobj,
+ DRM_SYNCOBJ_CREATE_SIGNALED,
+ NULL);
+ if (ret) {
context_close(ctx);
- return ERR_CAST(timeline);
+ return ERR_PTR(ret);
}
-
- __assign_timeline(ctx, timeline);
- intel_timeline_put(timeline);
}
trace_i915_context_create(ctx);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 676592e27e7d2..df76767f0c41b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -83,7 +83,19 @@ struct i915_gem_context {
struct i915_gem_engines __rcu *engines;
struct mutex engines_mutex; /* guards writes to engines */
- struct intel_timeline *timeline;
+ /**
+ * @syncobj: Shared timeline syncobj
+ *
+ * When the SHARED_TIMELINE flag is set on context creation, we
+ * emulate a single timeline across all engines using this syncobj.
+ * For every execbuffer2 call, this syncobj is used as both an in-
+ * and out-fence. Unlike the real intel_timeline, this doesn't
+ * provide perfect atomic in-order guarantees if the client races
+ * with itself by calling execbuffer2 twice concurrently. However,
+ * if userspace races with itself, that's not likely to yield well-
+ * defined results anyway so we choose to not care.
+ */
+ struct drm_syncobj *syncobj;
/**
* @vm: unique address space (GTT)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b812f313422a9..d640bba6ad9ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3460,6 +3460,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (unlikely(eb.gem_context->syncobj)) {
+ struct dma_fence *fence;
+
+ fence = drm_syncobj_fence_get(eb.gem_context->syncobj);
+ err = i915_request_await_dma_fence(eb.request, fence);
+ dma_fence_put(fence);
+ if (err)
+ goto err_ext;
+ }
+
if (in_fence) {
if (args->flags & I915_EXEC_FENCE_SUBMIT)
err = i915_request_await_execution(eb.request,
@@ -3517,6 +3527,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
fput(out_fence->file);
}
}
+
+ if (unlikely(eb.gem_context->syncobj)) {
+ drm_syncobj_replace_fence(eb.gem_context->syncobj,
+ &eb.request->fence);
+ }
+
i915_request_put(eb.request);
err_vma: