[PATCH 2/3] drm/i915: Keep track of reserved execlist ports

Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> · Fri, 29 Sep 2017 15:42:48 +0300

To further enchance port processing, keep track of
reserved ports. This way we can iterate only the used subset
of port space. Note that we lift the responsibility of
execlists_submit_request() to inspect hw availability and
always do dequeuing. This is to ensure that only the irq
handler will be responsible for keeping track of available ports.

v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris)
v3: rebase to new naming

Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx>
Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_guc_submission.c |  52 ++++++++------
 drivers/gpu/drm/i915/i915_irq.c            |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 112 ++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  45 +++++++++---
 5 files changed, 141 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index c6cd05a5347c..d6071396da32 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client)
  * @engine: engine associated with the commands
  *
  * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
+ * as expected, which really shouldn't happen.
  */
 static void i915_guc_submit(struct intel_engine_cs *engine)
 {
@@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client = guc->execbuf_client;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	const unsigned int engine_id = engine->id;
 	unsigned int n;
 
-	for (n = 0; n < ARRAY_SIZE(execlists->port); n++) {
+	for (n = 0; n < execlists_active_ports(execlists); n++) {
+		struct execlist_port *port;
 		struct drm_i915_gem_request *rq;
 		unsigned int count;
 
-		rq = port_unpack(&port[n], &count);
+		port = execlists_port(execlists, n);
+		rq = port_unpack(port, &count);
+
 		if (rq && count == 0) {
-			port_set(&port[n], port_pack(rq, ++count));
+			port_set(port, port_pack(rq, ++count));
 
 			if (i915_vma_is_map_and_fenceable(rq->ring->vma))
 				POSTING_READ_FW(GUC_STATUS);
@@ -559,26 +561,29 @@ static void port_assign(struct execlist_port *port,
 
 static void i915_guc_dequeue(struct intel_engine_cs *engine)
 {
+
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
+	struct execlist_port *port;
 	struct drm_i915_gem_request *last = NULL;
-	const struct execlist_port * const last_port = execlists_port_tail(execlists);
 	bool submit = false;
 	struct rb_node *rb;
 
-	if (port_isset(port))
-		port++;
-
 	spin_lock_irq(&engine->timeline->lock);
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-	while (rb) {
+
+	if (unlikely(!rb))
+		goto done;
+
+	port = execlists_request_port(execlists);
+
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			if (last && rq->ctx != last->ctx) {
-				if (port == last_port) {
+				if (!execlists_inactive_ports(execlists)) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -587,7 +592,8 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 
-				port = execlists_port_next(execlists, port);
+				port = execlists_request_port(execlists);
+				GEM_BUG_ON(port_isset(port));
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
@@ -604,7 +610,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
 done:
 	execlists->first = rb;
 	if (submit) {
@@ -618,21 +624,21 @@ static void i915_guc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists_port_head(execlists);
-	const struct execlist_port * const last_port = execlists_port_tail(execlists);
-	struct drm_i915_gem_request *rq;
 
-	rq = port_request(port);
-	while (rq && i915_gem_request_completed(rq)) {
+	while (execlists_active_ports(execlists)) {
+		struct execlist_port *port = execlists_port_head(execlists);
+		struct drm_i915_gem_request *rq = port_request(port);
+
+		if (!i915_gem_request_completed(rq))
+			break;
+
 		trace_i915_gem_request_out(rq);
 		i915_gem_request_put(rq);
 
-		port = execlists_port_complete(execlists, port);
-
-		rq = port_request(port);
+		execlists_release_port(execlists, port);
 	}
 
-	if (!port_isset(last_port))
+	if (execlists_inactive_ports(execlists))
 		i915_guc_dequeue(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b9d1f379c5a0..424e960fa115 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1382,7 +1382,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		if (port_count(execlists_port_head(execlists))) {
+		if (READ_ONCE(execlists->port_count)) {
 			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 			tasklet = true;
 		}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 3f857786e2ed..65e9c3a66e70 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -409,6 +409,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+	execlists->port_head = 0;
+	execlists->port_count = 0;
+
 	execlists->queue = RB_ROOT;
 	execlists->first = NULL;
 }
@@ -1504,8 +1507,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
 		return false;
 
-	/* Both ports drained, no more ELSP submission? */
-	if (port_request(execlists_port_head(&engine->execlists)))
+	/* All ports drained, no more ELSP submission? */
+	if (execlists_active_ports(&engine->execlists))
 		return false;
 
 	/* ELSP is empty, but there are ready requests? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cb7fb3c651ce..b056bf5ddd53 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -392,6 +392,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	return ce->lrc_desc;
 }
 
+static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+{
+	writel(upper_32_bits(desc), elsp);
+	writel(lower_32_bits(desc), elsp);
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -399,28 +405,29 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
-	for (n = execlists_num_ports(execlists); n--; ) {
-		struct execlist_port *port;
+	for (n = 0; n < execlists_inactive_ports(execlists); n++)
+		elsp_write(0, elsp);
+
+	for (n = execlists_active_ports(execlists); n--; ) {
 		struct drm_i915_gem_request *rq;
+		struct execlist_port *port;
 		unsigned int count;
 		u64 desc;
 
 		port = execlists_port(execlists, n);
 		rq = port_unpack(port, &count);
-		if (rq) {
-			GEM_BUG_ON(count > !n);
-			if (!count++)
-				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-			port_set(port, port_pack(rq, count));
-			desc = execlists_update_context(rq);
-			GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
-		} else {
-			GEM_BUG_ON(!n);
-			desc = 0;
-		}
 
-		writel(upper_32_bits(desc), elsp);
-		writel(lower_32_bits(desc), elsp);
+		GEM_BUG_ON(!rq);
+		GEM_BUG_ON(count > !n);
+
+		if (!count++)
+			execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+
+		port_set(port, port_pack(rq, count));
+		desc = execlists_update_context(rq);
+		GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
+
+		elsp_write(desc, elsp);
 	}
 }
 
@@ -455,15 +462,23 @@ static void port_assign(struct execlist_port *port,
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *last;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists_port_head(execlists);
-	const struct execlist_port * const last_port = execlists_port_tail(execlists);
+	struct execlist_port *port;
+	struct drm_i915_gem_request *last;
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port_request(port);
-	if (last)
+	spin_lock_irq(&engine->timeline->lock);
+	rb = execlists->first;
+	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
+
+	if (unlikely(!rb))
+		goto done;
+
+	if (execlists_active_ports(execlists)) {
+		port = execlists_port_tail(execlists);
+		last = port_request(port);
+
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
 		 * as we resubmit the request. See gen8_emit_breadcrumb()
@@ -471,6 +486,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * request.
 		 */
 		last->tail = last->wa_tail;
+	} else {
+		/* Allocate first port to coalesce into */
+		port = execlists_request_port(execlists);
+		last = NULL;
+	}
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -493,10 +513,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock_irq(&engine->timeline->lock);
-	rb = execlists->first;
-	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-	while (rb) {
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
@@ -514,11 +531,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 */
 			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
 				/*
-				 * If we are on the second port and cannot
+				 * If we are on the last port and cannot
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port) {
+				if (!execlists_inactive_ports(execlists)) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -543,8 +560,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 
-				port = execlists_port_next(execlists, port);
-
+				port = execlists_request_port(execlists);
 				GEM_BUG_ON(port_isset(port));
 			}
 
@@ -562,7 +578,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
+
 done:
 	execlists->first = rb;
 	if (submit)
@@ -576,18 +593,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 static void
 execlists_cancel_port_requests(struct intel_engine_execlists *execlists)
 {
-	struct execlist_port *port = execlists->port;
-	unsigned int num_ports = ARRAY_SIZE(execlists->port);
+	while (execlists_active_ports(execlists)) {
+		struct execlist_port *port;
+		struct drm_i915_gem_request *rq;
 
-	while (num_ports-- && port_isset(port)) {
-		struct drm_i915_gem_request *rq = port_request(port);
+		port = execlists_port_head(execlists);
+		GEM_BUG_ON(!port_isset(port));
+
+		rq = port_request(port);
+		GEM_BUG_ON(!rq);
 
 		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 		i915_gem_request_put(rq);
 
-		memset(port, 0, sizeof(*port));
-		port++;
+		execlists_release_port(execlists, port);
 	}
+
+	GEM_BUG_ON(execlists->port_count);
+
+	execlists->port_count = 0;
+	execlists->port_head = 0;
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -649,10 +674,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 static bool execlists_elsp_ready(struct intel_engine_execlists * const execlists)
 {
-	struct execlist_port * const port0 = execlists_port_head(execlists);
-	struct execlist_port * const port1 = execlists_port_next(execlists, port0);
+	const unsigned int active = execlists_active_ports(execlists);
+
+	if (!active)
+		return true;
 
-	return port_count(port0) + port_count(port1) < 2;
+	return port_count(execlists_port_tail(execlists)) + active < 2;
 }
 
 /*
@@ -663,7 +690,6 @@ static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists_port_head(execlists);
 	struct drm_i915_private *dev_priv = engine->i915;
 
 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -720,6 +746,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 		}
 
 		while (head != tail) {
+			struct execlist_port *port;
 			struct drm_i915_gem_request *rq;
 			unsigned int status;
 			unsigned int count;
@@ -748,6 +775,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
+			port = execlists_port_head(execlists);
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
@@ -761,13 +789,13 @@ static void intel_lrc_irq_handler(unsigned long data)
 				trace_i915_gem_request_out(rq);
 				i915_gem_request_put(rq);
 
-				port = execlists_port_complete(execlists, port);
+				execlists_release_port(execlists, port);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
 
 			/* After the final element, the hw should be idle */
-			GEM_BUG_ON(port_count(port) == 0 &&
+			GEM_BUG_ON(execlists_active_ports(execlists) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -792,7 +820,7 @@ static void insert_request(struct intel_engine_cs *engine,
 	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
 
 	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
-	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(execlists))
+	if (ptr_unmask_bits(p, 1))
 		tasklet_hi_schedule(&execlists->irq_tasklet);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2e795b44a942..c09d1c93fd15 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -249,6 +249,11 @@ struct intel_engine_execlists {
 	unsigned int port_head;
 
 	/**
+	 * @port_count: reserved ports
+	 */
+	unsigned int port_count;
+
+	/**
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root queue;
@@ -529,6 +534,18 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
 	return execlists->port_mask + 1;
 }
 
+static inline unsigned int
+execlists_active_ports(const struct intel_engine_execlists * const execlists)
+{
+	return execlists->port_count;
+}
+
+static inline unsigned int
+execlists_inactive_ports(const struct intel_engine_execlists * const execlists)
+{
+	return execlists_num_ports(execlists) - execlists_active_ports(execlists);
+}
+
 #define __port_n(start, n, mask) (((start) + (n)) & (mask))
 #define port_n(e, n) __port_n((e)->port_head, n, (e)->port_mask)
 
@@ -543,33 +560,43 @@ execlists_port(struct intel_engine_execlists * const execlists,
 static inline struct execlist_port *
 execlists_port_head(struct intel_engine_execlists * const execlists)
 {
+	GEM_BUG_ON(!execlists->port_count);
+
 	return execlists_port(execlists, 0);
 }
 
 static inline struct execlist_port *
 execlists_port_tail(struct intel_engine_execlists * const execlists)
 {
-	return execlists_port(execlists, -1);
+	GEM_BUG_ON(!execlists->port_count);
+
+	return execlists_port(execlists, execlists->port_count - 1);
 }
 
 static inline struct execlist_port *
-execlists_port_next(struct intel_engine_execlists * const execlists,
-		    const struct execlist_port * const port)
+execlists_request_port(struct intel_engine_execlists * const execlists)
 {
-	const unsigned int n = port_index(port, execlists);
+	GEM_BUG_ON(execlists->port_count == execlists->port_mask + 1);
+
+	execlists->port_count++;
 
-	return execlists_port(execlists, n + 1);
+	GEM_BUG_ON(port_isset(execlists_port_tail(execlists)));
+
+	return execlists_port_tail(execlists);
 }
 
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
-			struct execlist_port * const port)
+static inline void
+execlists_release_port(struct intel_engine_execlists * const execlists,
+		       struct execlist_port * const port)
 {
+
 	GEM_BUG_ON(port_index(port, execlists) != execlists->port_head);
+	GEM_BUG_ON(!port_isset(port));
+	GEM_BUG_ON(!execlists->port_count);
 
 	memset(port, 0, sizeof(struct execlist_port));
 	execlists->port_head = port_n(execlists, 1);
-	return execlists_port_head(execlists);
+	execlists->port_count--;
 }
 
 static inline unsigned int
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx