Re: [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 11/01/2017 13:13, Chris Wilson wrote:
This emulates execlists on top of the GuC in order to defer submission of
 > requests to the hardware. This deferral allows time for high priority
requests to gazump their way to the head of the queue, however it nerfs
the GuC by converting it back into a simple execlist (where the CPU has
to wake up after every request to feed new commands into the GuC).

v2: Drop hack status - though iirc there is still a lockdep inversion
between fence and engine->timeline->lock (which is impossible as the
nesting only occurs on different fences - hopefully just requires some
judicious lockdep annotation)

Hm hm... so fence->lock under timeline->lock when we enable signalling, while we already have the opposite in the submit_notify->submit_request yes? That would mean a per fence lock class, which can't work, or a nested annotation on fence->lock? So outside i915?


Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 79 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_irq.c            |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
 3 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 913d87358972..bdc9e2bc5eb9 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 	u32 freespace;
 	int ret;

-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
 	freespace -= client->wq_rsvd;
 	if (likely(freespace >= wqi_size)) {
@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 		client->no_wq_space++;
 		ret = -EAGAIN;
 	}
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);

 	return ret;
 }
@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)

 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);

-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	client->wq_rsvd -= wqi_size;
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 }

 /* Construct a Work Item and append it to the GuC's Work Queue */
@@ -534,10 +534,74 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)

 static void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
-	i915_gem_request_submit(rq);
+	__i915_gem_request_submit(rq);
 	__i915_guc_submit(rq);
 }

+static bool i915_guc_dequeue(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *last = port[0].request;
+	unsigned long flags;
+	struct rb_node *rb;
+	bool submit = false;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+	rb = engine->execlist_first;
+	while (rb) {
+		struct drm_i915_gem_request *cursor =
+			rb_entry(rb, typeof(*cursor), priotree.node);
+
+		if (last && cursor->ctx != last->ctx) {
+			if (port != engine->execlist_port)
+				break;
+
+			i915_gem_request_assign(&port->request, last);
+			dma_fence_enable_sw_signaling(&last->fence);
+			port++;
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
+		RB_CLEAR_NODE(&cursor->priotree.node);
+		cursor->priotree.priority = INT_MAX;
+
+		i915_guc_submit(cursor);
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		i915_gem_request_assign(&port->request, last);
+		dma_fence_enable_sw_signaling(&last->fence);
+		engine->execlist_first = rb;
+	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+	return submit;
+}

It is again tempting me to suggest a single instance of the dequeue loop. Maybe something like:

i915_request_dequeue(engine, assign_func, submit_func)
{
	...
	
	while (rb) {
		...
		if () {
		...
			assign_func();
			port++;
		}
		
		...

		submit_func();
		last = cursor;
		submit = true;
	}
	if (submit) {
		assign_func();
		engine->execlists_first = rb;
	}

	...
}

execlists_dequeue_assign()
{
	i915_gem_request_assign();
}

execlists_deqeue_submit()
{
	__i915_gem_request_submit();
}

execlists_dequeue()
{
	...

	submit = i915_request_deqeue(engine,
				     execlists_dequeue_assign,
				     execlists_deqeue_submit);
	if (submit)
		execlists_submit_ports(engine);
}

guc_dequeue_assign()
{
	i915_gem_request_assign();
	dma_fence_enable_sw_signaling();
}

guc_dequeue_submit()
{
	i915_guc_submit();
}

i915_guc_dequeue()
{
	...

	submit = i915_request_dequeue(engine,
				      guc_dequeue_assign,
				      guc_dequeue_submit);

	...
}

+
+static void i915_guc_irq_handler(unsigned long data)
+{
+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq;
+	bool submit;
+
+	do {
+		rq = port[0].request;
+		while (rq && i915_gem_request_completed(rq)) {
+			i915_gem_request_put(rq);
+			rq = port[1].request;
+			port[0].request = rq;
+			port[1].request = NULL;
+		}
+
+		submit = false;
+		if (!port[1].request)
+			submit = i915_guc_dequeue(engine);
+	} while (submit);

Outer loop looks to be not needed since the i915_guc_dequeue will fill both ports if it can. Or it is for the unlikely case a new request manages to get in after one and only has been submitted?

+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
@@ -1428,8 +1492,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *rq;

-		engine->submit_request = i915_guc_submit;
-		engine->schedule = NULL;
+		tasklet_init(&engine->irq_tasklet,
+			     i915_guc_irq_handler,
+			     (unsigned long)engine);

 		/* Replay the current set of previously submitted requests */
 		list_for_each_entry(rq, &engine->timeline->requests, link) {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 75fb1f66cc0c..624267bebf56 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
-	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
+		tasklet_schedule(&engine->irq_tasklet);
 		notify_ring(engine);
+	}
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
 		tasklet_schedule(&engine->irq_tasklet);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d9de455da131..33fe7e5c9364 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);

 	/* After a GPU reset, we may have requests to replay */
-	if (!execlists_elsp_idle(engine)) {
+	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
 		engine->execlist_port[0].count = 0;
 		engine->execlist_port[1].count = 0;
 		execlists_submit_ports(engine);
@@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);

-	if (i915.enable_guc_submission)
-		return;
-
 	/* Catch up with any missed context-switch interrupts */
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
 	if (request->ctx != port[0].request->ctx) {


Looks good, minus the locking concern.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux