[PATCH 2/2] drm/i915: Execlist irq handler micro optimisations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Assorted changes most likely without any practical effect
apart from a tiny reduction in generated code for the interrupt
handler and request submission.

 * Remove needless initialization.
 * Improve cache locality by reorganizing code and/or using
   branch hints to keep unexpected or error conditions out
   of line.
 * Favor busy submit path vs. empty queue.
 * Less branching in hot-paths.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 91 ++++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 2 files changed, 44 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 951f1e6af947..589d6404b5ca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -269,6 +269,9 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 
+	if (IS_GEN8(dev) || IS_GEN9(dev))
+		ring->idle_lite_restore_wa = ~0;
+
 	ring->disable_lite_restore_wa = (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
 					IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
 					(ring->id == VCS || ring->id == VCS2);
@@ -424,7 +427,7 @@ static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
 static void execlists_context_unqueue(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
+	struct drm_i915_gem_request *cursor, *tmp;
 
 	assert_spin_locked(&ring->execlist_lock);
 
@@ -434,9 +437,6 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 	 */
 	WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
 
-	if (list_empty(&ring->execlist_queue))
-		return;
-
 	/* Try to read in pairs */
 	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
 				 execlist_link) {
@@ -451,37 +451,35 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 			req0 = cursor;
 		} else {
 			req1 = cursor;
+			WARN_ON(req1->elsp_submitted);
 			break;
 		}
 	}
 
-	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
+	if (unlikely(!req0))
+		return;
+
+	if (req0->elsp_submitted & ring->idle_lite_restore_wa) {
 		/*
-		 * WaIdleLiteRestore: make sure we never cause a lite
-		 * restore with HEAD==TAIL
+		 * WaIdleLiteRestore: make sure we never cause a lite restore
+		 * with HEAD==TAIL.
+		 *
+		 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
+		 * resubmit the request. See gen8_emit_request() for where we
+		 * prepare the padding after the end of the request.
 		 */
-		if (req0->elsp_submitted) {
-			/*
-			 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
-			 * as we resubmit the request. See gen8_emit_request()
-			 * for where we prepare the padding after the end of the
-			 * request.
-			 */
-			struct intel_ringbuffer *ringbuf;
+		struct intel_ringbuffer *ringbuf;
 
-			ringbuf = req0->ctx->engine[ring->id].ringbuf;
-			req0->tail += 8;
-			req0->tail &= ringbuf->size - 1;
-		}
+		ringbuf = req0->ctx->engine[ring->id].ringbuf;
+		req0->tail += 8;
+		req0->tail &= ringbuf->size - 1;
 	}
 
-	WARN_ON(req1 && req1->elsp_submitted);
-
 	execlists_submit_requests(req0, req1);
 }
 
-static bool execlists_check_remove_request(struct intel_engine_cs *ring,
-					   u32 request_id)
+static unsigned int
+execlists_check_remove_request(struct intel_engine_cs *ring, u32 request_id)
 {
 	struct drm_i915_gem_request *head_req;
 
@@ -491,20 +489,21 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					    struct drm_i915_gem_request,
 					    execlist_link);
 
-	if (head_req != NULL) {
-		if (intel_execlists_ctx_id(head_req->ctx, ring) == request_id) {
-			WARN(head_req->elsp_submitted == 0,
-			     "Never submitted head request\n");
+	if (!head_req)
+		return 0;
+
+	if (unlikely(intel_execlists_ctx_id(head_req->ctx, ring) != request_id))
+		return 0;
 
-			if (--head_req->elsp_submitted <= 0) {
-				list_move_tail(&head_req->execlist_link,
-					       &ring->execlist_retired_req_list);
-				return true;
-			}
-		}
-	}
+	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
 
-	return false;
+	if (--head_req->elsp_submitted > 0)
+		return 0;
+
+	list_move_tail(&head_req->execlist_link,
+		       &ring->execlist_retired_req_list);
+
+	return 1;
 }
 
 static u32 get_context_status(struct intel_engine_cs *ring, u8 read_pointer,
@@ -551,7 +550,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
 			continue;
 
-		if (status & GEN8_CTX_STATUS_PREEMPTED) {
+		if (unlikely(status & GEN8_CTX_STATUS_PREEMPTED)) {
 			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
 				if (execlists_check_remove_request(ring, status_id))
 					WARN(1, "Lite Restored request removed from queue\n");
@@ -559,21 +558,16 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 				WARN(1, "Preemption without Lite Restore\n");
 		}
 
-		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
-		    (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
-			if (execlists_check_remove_request(ring, status_id))
-				submit_contexts++;
-		}
+		if (status & (GEN8_CTX_STATUS_ACTIVE_IDLE |
+		    GEN8_CTX_STATUS_ELEMENT_SWITCH))
+			submit_contexts +=
+				execlists_check_remove_request(ring, status_id);
 	}
 
-	if (ring->disable_lite_restore_wa) {
-		/* Prevent a ctx to preempt itself */
-		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
-		    (submit_contexts != 0))
-			execlists_context_unqueue(ring);
-	} else if (submit_contexts != 0) {
+	if (submit_contexts && (!ring->disable_lite_restore_wa ||
+	    (ring->disable_lite_restore_wa && (status &
+	    GEN8_CTX_STATUS_ACTIVE_IDLE))))
 		execlists_context_unqueue(ring);
-	}
 
 	spin_unlock(&ring->execlist_lock);
 
@@ -2028,6 +2022,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 		ring->status_page.obj = NULL;
 	}
 
+	ring->idle_lite_restore_wa = 0;
 	ring->disable_lite_restore_wa = false;
 	ring->ctx_desc_template = 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 566b0ae10ce0..54054ba6e223 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -272,6 +272,7 @@ struct  intel_engine_cs {
 	struct list_head execlist_queue;
 	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
+	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
 	u32 ctx_desc_template;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux