[PATCH 2/3] drm/i915: Keep track of reserved execlist ports

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To further enchance port processing, keep track of
reserved ports. This way we can iterate only the used subset
of port space. Note that we lift the responsibility of
execlists_submit_request() to inspect hw availability and
always do dequeuing. This is to ensure that only the irq
handler will be responsible for keeping track of available ports.

v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris)
v3: rebase to new naming

Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx>
Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_guc_submission.c |  52 ++++++++------
 drivers/gpu/drm/i915/i915_irq.c            |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 112 ++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  45 +++++++++---
 5 files changed, 141 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index c6cd05a5347c..d6071396da32 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client)
  * @engine: engine associated with the commands
  *
  * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
+ * as expected, which really shouldn't happen.
  */
 static void i915_guc_submit(struct intel_engine_cs *engine)
 {
@@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client = guc->execbuf_client;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	const unsigned int engine_id = engine->id;
 	unsigned int n;
 
-	for (n = 0; n < ARRAY_SIZE(execlists->port); n++) {
+	for (n = 0; n < execlists_active_ports(execlists); n++) {
+		struct execlist_port *port;
 		struct drm_i915_gem_request *rq;
 		unsigned int count;
 
-		rq = port_unpack(&port[n], &count);
+		port = execlists_port(execlists, n);
+		rq = port_unpack(port, &count);
+
 		if (rq && count == 0) {
-			port_set(&port[n], port_pack(rq, ++count));
+			port_set(port, port_pack(rq, ++count));
 
 			if (i915_vma_is_map_and_fenceable(rq->ring->vma))
 				POSTING_READ_FW(GUC_STATUS);
@@ -559,26 +561,29 @@ static void port_assign(struct execlist_port *port,
 
 static void i915_guc_dequeue(struct intel_engine_cs *engine)
 {
+
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
+	struct execlist_port *port;
 	struct drm_i915_gem_request *last = NULL;
-	const struct execlist_port * const last_port = execlists_port_tail(execlists);
 	bool submit = false;
 	struct rb_node *rb;
 
-	if (port_isset(port))
-		port++;
-
 	spin_lock_irq(&engine->timeline->lock);
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-	while (rb) {
+
+	if (unlikely(!rb))
+		goto done;
+
+	port = execlists_request_port(execlists);
+
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
 		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
 			if (last && rq->ctx != last->ctx) {
-				if (port == last_port) {
+				if (!execlists_inactive_ports(execlists)) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -587,7 +592,8 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 
-				port = execlists_port_next(execlists, port);
+				port = execlists_request_port(execlists);
+				GEM_BUG_ON(port_isset(port));
 			}
 
 			INIT_LIST_HEAD(&rq->priotree.link);
@@ -604,7 +610,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
 done:
 	execlists->first = rb;
 	if (submit) {
@@ -618,21 +624,21 @@ static void i915_guc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists_port_head(execlists);
-	const struct execlist_port * const last_port = execlists_port_tail(execlists);
-	struct drm_i915_gem_request *rq;
 
-	rq = port_request(port);
-	while (rq && i915_gem_request_completed(rq)) {
+	while (execlists_active_ports(execlists)) {
+		struct execlist_port *port = execlists_port_head(execlists);
+		struct drm_i915_gem_request *rq = port_request(port);
+
+		if (!i915_gem_request_completed(rq))
+			break;
+
 		trace_i915_gem_request_out(rq);
 		i915_gem_request_put(rq);
 
-		port = execlists_port_complete(execlists, port);
-
-		rq = port_request(port);
+		execlists_release_port(execlists, port);
 	}
 
-	if (!port_isset(last_port))
+	if (execlists_inactive_ports(execlists))
 		i915_guc_dequeue(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b9d1f379c5a0..424e960fa115 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1382,7 +1382,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		if (port_count(execlists_port_head(execlists))) {
+		if (READ_ONCE(execlists->port_count)) {
 			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 			tasklet = true;
 		}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 3f857786e2ed..65e9c3a66e70 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -409,6 +409,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+	execlists->port_head = 0;
+	execlists->port_count = 0;
+
 	execlists->queue = RB_ROOT;
 	execlists->first = NULL;
 }
@@ -1504,8 +1507,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
 		return false;
 
-	/* Both ports drained, no more ELSP submission? */
-	if (port_request(execlists_port_head(&engine->execlists)))
+	/* All ports drained, no more ELSP submission? */
+	if (execlists_active_ports(&engine->execlists))
 		return false;
 
 	/* ELSP is empty, but there are ready requests? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cb7fb3c651ce..b056bf5ddd53 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -392,6 +392,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	return ce->lrc_desc;
 }
 
+static inline void elsp_write(u64 desc, u32 __iomem *elsp)
+{
+	writel(upper_32_bits(desc), elsp);
+	writel(lower_32_bits(desc), elsp);
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -399,28 +405,29 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
-	for (n = execlists_num_ports(execlists); n--; ) {
-		struct execlist_port *port;
+	for (n = 0; n < execlists_inactive_ports(execlists); n++)
+		elsp_write(0, elsp);
+
+	for (n = execlists_active_ports(execlists); n--; ) {
 		struct drm_i915_gem_request *rq;
+		struct execlist_port *port;
 		unsigned int count;
 		u64 desc;
 
 		port = execlists_port(execlists, n);
 		rq = port_unpack(port, &count);
-		if (rq) {
-			GEM_BUG_ON(count > !n);
-			if (!count++)
-				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-			port_set(port, port_pack(rq, count));
-			desc = execlists_update_context(rq);
-			GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
-		} else {
-			GEM_BUG_ON(!n);
-			desc = 0;
-		}
 
-		writel(upper_32_bits(desc), elsp);
-		writel(lower_32_bits(desc), elsp);
+		GEM_BUG_ON(!rq);
+		GEM_BUG_ON(count > !n);
+
+		if (!count++)
+			execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+
+		port_set(port, port_pack(rq, count));
+		desc = execlists_update_context(rq);
+		GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
+
+		elsp_write(desc, elsp);
 	}
 }
 
@@ -455,15 +462,23 @@ static void port_assign(struct execlist_port *port,
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *last;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists_port_head(execlists);
-	const struct execlist_port * const last_port = execlists_port_tail(execlists);
+	struct execlist_port *port;
+	struct drm_i915_gem_request *last;
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port_request(port);
-	if (last)
+	spin_lock_irq(&engine->timeline->lock);
+	rb = execlists->first;
+	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
+
+	if (unlikely(!rb))
+		goto done;
+
+	if (execlists_active_ports(execlists)) {
+		port = execlists_port_tail(execlists);
+		last = port_request(port);
+
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
 		 * as we resubmit the request. See gen8_emit_breadcrumb()
@@ -471,6 +486,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * request.
 		 */
 		last->tail = last->wa_tail;
+	} else {
+		/* Allocate first port to coalesce into */
+		port = execlists_request_port(execlists);
+		last = NULL;
+	}
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -493,10 +513,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock_irq(&engine->timeline->lock);
-	rb = execlists->first;
-	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
-	while (rb) {
+	do {
 		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
 		struct drm_i915_gem_request *rq, *rn;
 
@@ -514,11 +531,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 */
 			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
 				/*
-				 * If we are on the second port and cannot
+				 * If we are on the last port and cannot
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port) {
+				if (!execlists_inactive_ports(execlists)) {
 					__list_del_many(&p->requests,
 							&rq->priotree.link);
 					goto done;
@@ -543,8 +560,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (submit)
 					port_assign(port, last);
 
-				port = execlists_port_next(execlists, port);
-
+				port = execlists_request_port(execlists);
 				GEM_BUG_ON(port_isset(port));
 			}
 
@@ -562,7 +578,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
-	}
+	} while (rb);
+
 done:
 	execlists->first = rb;
 	if (submit)
@@ -576,18 +593,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 static void
 execlists_cancel_port_requests(struct intel_engine_execlists *execlists)
 {
-	struct execlist_port *port = execlists->port;
-	unsigned int num_ports = ARRAY_SIZE(execlists->port);
+	while (execlists_active_ports(execlists)) {
+		struct execlist_port *port;
+		struct drm_i915_gem_request *rq;
 
-	while (num_ports-- && port_isset(port)) {
-		struct drm_i915_gem_request *rq = port_request(port);
+		port = execlists_port_head(execlists);
+		GEM_BUG_ON(!port_isset(port));
+
+		rq = port_request(port);
+		GEM_BUG_ON(!rq);
 
 		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 		i915_gem_request_put(rq);
 
-		memset(port, 0, sizeof(*port));
-		port++;
+		execlists_release_port(execlists, port);
 	}
+
+	GEM_BUG_ON(execlists->port_count);
+
+	execlists->port_count = 0;
+	execlists->port_head = 0;
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -649,10 +674,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 static bool execlists_elsp_ready(struct intel_engine_execlists * const execlists)
 {
-	struct execlist_port * const port0 = execlists_port_head(execlists);
-	struct execlist_port * const port1 = execlists_port_next(execlists, port0);
+	const unsigned int active = execlists_active_ports(execlists);
+
+	if (!active)
+		return true;
 
-	return port_count(port0) + port_count(port1) < 2;
+	return port_count(execlists_port_tail(execlists)) + active < 2;
 }
 
 /*
@@ -663,7 +690,6 @@ static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists_port_head(execlists);
 	struct drm_i915_private *dev_priv = engine->i915;
 
 	/* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -720,6 +746,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 		}
 
 		while (head != tail) {
+			struct execlist_port *port;
 			struct drm_i915_gem_request *rq;
 			unsigned int status;
 			unsigned int count;
@@ -748,6 +775,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
+			port = execlists_port_head(execlists);
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
 
@@ -761,13 +789,13 @@ static void intel_lrc_irq_handler(unsigned long data)
 				trace_i915_gem_request_out(rq);
 				i915_gem_request_put(rq);
 
-				port = execlists_port_complete(execlists, port);
+				execlists_release_port(execlists, port);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
 
 			/* After the final element, the hw should be idle */
-			GEM_BUG_ON(port_count(port) == 0 &&
+			GEM_BUG_ON(execlists_active_ports(execlists) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -792,7 +820,7 @@ static void insert_request(struct intel_engine_cs *engine,
 	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
 
 	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
-	if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(execlists))
+	if (ptr_unmask_bits(p, 1))
 		tasklet_hi_schedule(&execlists->irq_tasklet);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2e795b44a942..c09d1c93fd15 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -249,6 +249,11 @@ struct intel_engine_execlists {
 	unsigned int port_head;
 
 	/**
+	 * @port_count: reserved ports
+	 */
+	unsigned int port_count;
+
+	/**
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root queue;
@@ -529,6 +534,18 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
 	return execlists->port_mask + 1;
 }
 
+static inline unsigned int
+execlists_active_ports(const struct intel_engine_execlists * const execlists)
+{
+	return execlists->port_count;
+}
+
+static inline unsigned int
+execlists_inactive_ports(const struct intel_engine_execlists * const execlists)
+{
+	return execlists_num_ports(execlists) - execlists_active_ports(execlists);
+}
+
 #define __port_n(start, n, mask) (((start) + (n)) & (mask))
 #define port_n(e, n) __port_n((e)->port_head, n, (e)->port_mask)
 
@@ -543,33 +560,43 @@ execlists_port(struct intel_engine_execlists * const execlists,
 static inline struct execlist_port *
 execlists_port_head(struct intel_engine_execlists * const execlists)
 {
+	GEM_BUG_ON(!execlists->port_count);
+
 	return execlists_port(execlists, 0);
 }
 
 static inline struct execlist_port *
 execlists_port_tail(struct intel_engine_execlists * const execlists)
 {
-	return execlists_port(execlists, -1);
+	GEM_BUG_ON(!execlists->port_count);
+
+	return execlists_port(execlists, execlists->port_count - 1);
 }
 
 static inline struct execlist_port *
-execlists_port_next(struct intel_engine_execlists * const execlists,
-		    const struct execlist_port * const port)
+execlists_request_port(struct intel_engine_execlists * const execlists)
 {
-	const unsigned int n = port_index(port, execlists);
+	GEM_BUG_ON(execlists->port_count == execlists->port_mask + 1);
+
+	execlists->port_count++;
 
-	return execlists_port(execlists, n + 1);
+	GEM_BUG_ON(port_isset(execlists_port_tail(execlists)));
+
+	return execlists_port_tail(execlists);
 }
 
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
-			struct execlist_port * const port)
+static inline void
+execlists_release_port(struct intel_engine_execlists * const execlists,
+		       struct execlist_port * const port)
 {
+
 	GEM_BUG_ON(port_index(port, execlists) != execlists->port_head);
+	GEM_BUG_ON(!port_isset(port));
+	GEM_BUG_ON(!execlists->port_count);
 
 	memset(port, 0, sizeof(struct execlist_port));
 	execlists->port_head = port_n(execlists, 1);
-	return execlists_port_head(execlists);
+	execlists->port_count--;
 }
 
 static inline unsigned int
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux