[RFC 09/10] drm/i915: Trivial virtual engine implementation

Tvrtko Ursulin <tursulin@xxxxxxxxxxx> · Thu, 25 Jan 2018 13:33:32 +0000

From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Contexts marked as virtual can be load balanced between available engine
instaces. In this trivial implementation there are two important points to
kepp in mind:

1. Best engine is chosen by round-robin on every submission.

Every time context is transferred between engines an implicit
synchronization point is created, where the execution on the new engine
can only continue once the execution on the current engine has stopped
(for each context).

The round-robin on every submission is also far from ideal. If desired it
could later be improved with an engine busyness or queu-depth based
approaches which were demonstrated to work well when used for userspace
based balancing.

2. The engine is selected at the execbuf level which may be quite distant
in time from when the GPU actually becomes available to run things.

IMPORTANT CAVEAT:
This prototype implementation does not guarantee context state.

To provide context state in this prototype a much more "real" virtual
engine would have to be created which would include involved refactoring.

Userspace which uses specific engine features, not present on all engine
instances, needs to signal this fact via the engines capabilities uAPI.
i915 will then make sure only compatible engines are used for executing
the submission.

v2:
 * Fix GT2 configs and no VCS engine.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 46 +++++++++++++++++++++++++++---
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index fa1806ed9be6..f89a7be68133 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -204,6 +204,8 @@ struct i915_execbuffer {
 	struct drm_i915_gem_request *request; /** our request to build */
 	struct i915_vma *batch; /** identity of the batch obj/vma */
 
+	struct drm_i915_gem_request *prev_request; /** request to depend on */
+
 	/** actual size of execobj[] as we may extend it for the cmdparser */
 	unsigned int buffer_count;
 
@@ -2018,23 +2020,51 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 
 static int eb_select_engine_class_instance(struct i915_execbuffer *eb)
 {
+	struct drm_i915_private *i915 = eb->i915;
 	u64 eb_flags = eb->args->flags;
 	u8 class = eb_flags & I915_EXEC_RING_MASK;
 	u8 instance = (eb_flags & I915_EXEC_INSTANCE_MASK) >>
 		      I915_EXEC_INSTANCE_SHIFT;
 	u8 caps = (eb_flags & I915_EXEC_ENGINE_CAP_MASK) >>
 		  I915_EXEC_ENGINE_CAP_SHIFT;
+	struct drm_i915_gem_request *prev_req = NULL;
 	struct intel_engine_cs *engine;
 
-	if (instance && eb->ctx->virtual)
+	if (eb->ctx->virtual && instance) {
 		return -EINVAL;
+	} else if ((HAS_BSD(i915) && HAS_BSD2(i915)) && eb->ctx->virtual &&
+		   class == I915_ENGINE_CLASS_VIDEO) {
+		unsigned int vcs_instances = 2;
+		struct intel_timeline *timeline;
 
-	engine = intel_engine_lookup_user(eb->i915, class, instance);
+		instance = atomic_fetch_xor(1,
+					    &i915->mm.bsd_engine_dispatch_index);
 
-	if (engine && ((caps & engine->caps) != caps))
-		return -EINVAL;
+		do {
+			engine = i915->engine[_VCS(instance)];
+			instance ^= 1;
+			vcs_instances--;
+		} while ((caps & engine->caps) != caps && vcs_instances > 0);
+
+		if ((caps & engine->caps) != caps)
+			return -EINVAL;
+
+		timeline = i915_gem_context_lookup_timeline_class(eb->ctx,
+								  VIDEO_DECODE_CLASS);
+		spin_lock_irq(&timeline->lock);
+		prev_req = list_first_entry_or_null(&timeline->requests,
+						    struct drm_i915_gem_request,
+						    ctx_link);
+		spin_unlock_irq(&timeline->lock);
+	} else {
+		engine = intel_engine_lookup_user(i915, class, instance);
+
+		if (engine && ((caps & engine->caps) != caps))
+			return -EINVAL;
+	}
 
 	eb->engine = engine;
+	eb->prev_request = prev_req;
 
 	return 0;
 }
@@ -2100,6 +2130,7 @@ static int eb_select_engine(struct i915_execbuffer *eb, struct drm_file *file)
 	}
 
 	eb->engine = engine;
+	eb->prev_request = NULL;
 
 	return 0;
 }
@@ -2427,6 +2458,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_batch_unpin;
 	}
 
+	if (eb.prev_request) {
+		err = i915_gem_request_await_dma_fence(eb.request,
+						       &eb.prev_request->fence);
+		if (err)
+			goto err_request;
+	}
+
 	if (in_fence) {
 		err = i915_gem_request_await_dma_fence(eb.request, in_fence);
 		if (err < 0)
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx