[PATCH 4/4] drm/i915: Subsume intel_ctx_submit_request in to drm_i915_gem_request

Nick Hoath <nicholas.hoath@xxxxxxxxx> · Mon, 12 Jan 2015 15:33:12 +0000

Move all remaining elements that were unique to execlists queue items
in to the associated request.

Issue: VIZ-4274

v2: Rebase. Fixed issue of overzealous freeing of request.
v3: Removed re-addition of cleanup work queue (found by Daniel Vetter)
v4: Rebase.
v5: Actual removal of intel_ctx_submit_request. Update both tail and postfix
pointer in __i915_add_request (found by Thomas Daniel)

Signed-off-by: Nick Hoath <nicholas.hoath@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  8 +++----
 drivers/gpu/drm/i915/i915_drv.h      | 21 +++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c      | 12 +++++-----
 drivers/gpu/drm/i915/i915_reg.h      | 32 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_display.c |  6 ++++-
 drivers/gpu/drm/i915/intel_lrc.c     | 44 ++++++++++++++++--------------------
 drivers/gpu/drm/i915/intel_lrc.h     | 27 ----------------------
 7 files changed, 87 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d4cc482..b2dfac4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1928,7 +1928,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 	intel_runtime_pm_get(dev_priv);
 
 	for_each_ring(ring, dev_priv, ring_id) {
-		struct intel_ctx_submit_request *head_req = NULL;
+		struct drm_i915_gem_request *head_req = NULL;
 		int count = 0;
 		unsigned long flags;
 
@@ -1961,18 +1961,18 @@ static int i915_execlists(struct seq_file *m, void *data)
 		list_for_each(cursor, &ring->execlist_queue)
 			count++;
 		head_req = list_first_entry_or_null(&ring->execlist_queue,
-				struct intel_ctx_submit_request, execlist_link);
+				struct drm_i915_gem_request, execlist_link);
 		spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
 		seq_printf(m, "\t%d requests in queue\n", count);
 		if (head_req) {
 			struct drm_i915_gem_object *ctx_obj;
 
-			ctx_obj = head_req->request->ctx->engine[ring_id].state;
+			ctx_obj = head_req->ctx->engine[ring_id].state;
 			seq_printf(m, "\tHead request id: %u\n",
 				   intel_execlists_ctx_id(ctx_obj));
 			seq_printf(m, "\tHead request tail: %u\n",
-				   head_req->request->tail);
+				   head_req->tail);
 		}
 
 		seq_putc(m, '\n');
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index af024f2..8c6a207 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2089,6 +2089,27 @@ struct drm_i915_gem_request {
 	struct list_head client_list;
 
 	uint32_t uniq;
+
+	/**
+	 * The ELSP only accepts two elements at a time, so we queue
+	 * context/tail pairs on a given queue (ring->execlist_queue) until the
+	 * hardware is available. The queue serves a double purpose: we also use
+	 * it to keep track of the up to 2 contexts currently in the hardware
+	 * (usually one in execution and the other queued up by the GPU): We
+	 * only remove elements from the head
+	 * of the queue when the hardware informs us that an element has been
+	 * completed.
+	 *
+	 * All accesses to the queue are mediated by a spinlock
+	 * (ring->execlist_lock).
+	 */
+
+	/** Execlist link in the submission queue.*/
+	struct list_head execlist_link;
+
+	/** Execlists no. of times this request has been sent to the ELSP */
+	int elsp_submitted;
+
 };
 
 void i915_gem_request_free(struct kref *req_ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0649559..0195e3f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2414,7 +2414,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
 	struct intel_ringbuffer *ringbuf;
-	u32 request_ring_position, request_start;
+	u32 request_start;
 	int ret;
 
 	request = ring->outstanding_lazy_request;
@@ -2449,7 +2449,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	request_ring_position = intel_ring_get_tail(ringbuf);
+	request->postfix = intel_ring_get_tail(ringbuf);
 
 	if (i915.enable_execlists) {
 		ret = ring->emit_request(ringbuf, request);
@@ -2462,7 +2462,7 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	}
 
 	request->head = request_start;
-	request->postfix = request_ring_position;
+	request->tail = intel_ring_get_tail(ringbuf);
 
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2649,14 +2649,14 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * pinned in place.
 	 */
 	while (!list_empty(&ring->execlist_queue)) {
-		struct intel_ctx_submit_request *submit_req;
+		struct drm_i915_gem_request *submit_req;
 
 		submit_req = list_first_entry(&ring->execlist_queue,
-				struct intel_ctx_submit_request,
+				struct drm_i915_gem_request,
 				execlist_link);
 		list_del(&submit_req->execlist_link);
 		intel_runtime_pm_put(dev_priv);
-		i915_gem_context_unreference(submit_req->request->ctx);
+		i915_gem_context_unreference(submit_req->ctx);
 		kfree(submit_req);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0f32fd1a..9ae81db 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5223,6 +5223,10 @@ enum punit_power_well {
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
 #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
 
+/* GEN8 chicken */
+#define GEN9_SLICE_COMMON_ECO_CHICKEN0		0x07308
+#define GEN9_DISABLE_PIXEL_MASK_CAMMING		(1<<14)
+
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
@@ -6169,6 +6173,7 @@ enum punit_power_well {
 #define HALF_SLICE_CHICKEN3		0xe184
 #define   HSW_SAMPLE_C_PERFORMANCE	(1<<9)
 #define   GEN8_CENTROID_PIXEL_OPT_DIS	(1<<8)
+#define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC	(1<<5)
 #define   GEN8_SAMPLER_POWER_BYPASS_DIS	(1<<1)
 
 /* Audio */
@@ -7145,4 +7150,31 @@ enum punit_power_well {
 #define _PALETTE_A (dev_priv->info.display_mmio_offset + 0xa000)
 #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800)
 
+/*
+ * Chicken Registers for LLC/eLLC Hot Spotting Avoidance Mode for
+ * 3D/Media Compressed Resources
+ */
+#define GEN9_COMMON_SLICE_CHICKEN2_REG			0x7014
+#define GEN9_COMMON_SLICE_CHICKEN2_NEW_HASH_ENABLE	(1<<29 | 1<<13)
+#define GEN9_HALF_SLICE_CHICKEN7_REG			0xE194
+#define GEN9_HALF_SLICE_CHICKEN7_NEW_HASH_ENABLE	(1<<24 | 1<<8)
+#define GEN9_MMCD_MISC_CTRL_REG				0x4DDC
+#define GEN9_MMCD_MISC_CTRL_NEW_HASH_ENABLE		(1<<31 | 1<<27)
+#define GEN9_CHICKEN_MISC1_REG				0x42080
+#define GEN9_CHICKEN_MISC1_NEW_HASH_ENABLE		(1<<15)
+
+/* Unit Level Clock Gating Control */
+#define GEN9_UCGCTL6					0x9430
+#define GEN9_GAPS_UNIT_CLOCK_GATE_DISABLE		(1<<24)
+#define GEN9_HDC_UNIT_CLOCK_GATE_DISABLE		(1<<28)
+
+/* Mirror of EU disable fuses */
+#define MIRROR_EU_DISABLE0				0x9134
+#define MIRROR_EU_DISABLE1				0x9138
+#define MIRROR_EU_DISABLE_SS0				(0x7<<0)
+#define MIRROR_EU_DISABLE_SS1				(0x7<<8)
+#define MIRROR_EU_DISABLE_SS2				(0x7<<16)
+#define MIRROR_EU_DISABLE_SS3				(0x7<<24)
+
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d1a4de8..2942b11 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9514,9 +9514,13 @@ static int intel_gen9_queue_flip(struct drm_device *dev,
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, DERRMR);
+	/* WaClearRenderResponseMasks:bxt */
 	intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
 				DERRMR_PIPEB_PRI_FLIP_DONE |
-				DERRMR_PIPEC_PRI_FLIP_DONE));
+				DERRMR_PIPEC_PRI_FLIP_DONE |
+				DERRMR_PIPEA_SPR_FLIP_DONE |
+				DERRMR_PIPEB_SPR_FLIP_DONE |
+				DERRMR_PIPEC_SPR_FLIP_DONE));
 	intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8(1) |
 			      MI_SRM_LRM_GLOBAL_GTT);
 	intel_ring_emit(ring, DERRMR);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8275b7a..369ae55 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -404,8 +404,8 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring,
 
 static void execlists_context_unqueue(struct intel_engine_cs *ring)
 {
-	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
-	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
+	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
+	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
 
 	assert_spin_locked(&ring->execlist_lock);
 
@@ -417,7 +417,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 				 execlist_link) {
 		if (!req0) {
 			req0 = cursor;
-		} else if (req0->request->ctx == cursor->request->ctx) {
+		} else if (req0->ctx == cursor->ctx) {
 			/* Same ctx: ignore first request, as second request
 			 * will update tail past first request's workload */
 			cursor->elsp_submitted = req0->elsp_submitted;
@@ -433,9 +433,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 
 	WARN_ON(req1 && req1->elsp_submitted);
 
-	execlists_submit_contexts(ring, req0->request->ctx, req0->request->tail,
-				  req1 ? req1->request->ctx : NULL,
-				  req1 ? req1->request->tail : 0);
+	execlists_submit_contexts(ring, req0->ctx, req0->tail,
+				  req1 ? req1->ctx : NULL,
+				  req1 ? req1->tail : 0);
 
 	req0->elsp_submitted++;
 	if (req1)
@@ -445,17 +445,17 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					   u32 request_id)
 {
-	struct intel_ctx_submit_request *head_req;
+	struct drm_i915_gem_request *head_req;
 
 	assert_spin_locked(&ring->execlist_lock);
 
 	head_req = list_first_entry_or_null(&ring->execlist_queue,
-					    struct intel_ctx_submit_request,
+					    struct drm_i915_gem_request,
 					    execlist_link);
 
 	if (head_req != NULL) {
 		struct drm_i915_gem_object *ctx_obj =
-				head_req->request->ctx->engine[ring->id].state;
+				head_req->ctx->engine[ring->id].state;
 		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
 			WARN(head_req->elsp_submitted == 0,
 			     "Never submitted head request\n");
@@ -537,15 +537,11 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 				   u32 tail,
 				   struct drm_i915_gem_request *request)
 {
-	struct intel_ctx_submit_request *req = NULL, *cursor;
+	struct drm_i915_gem_request *cursor;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	unsigned long flags;
 	int num_elements = 0;
 
-	req = kzalloc(sizeof(*req), GFP_KERNEL);
-	if (req == NULL)
-		return -ENOMEM;
-
 	if (to != ring->default_context)
 		intel_lr_context_pin(ring, to);
 
@@ -559,14 +555,13 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 		if (request == NULL)
 			return -ENOMEM;
 		request->ring = ring;
+		request->ctx = to;
 	} else {
 		WARN_ON(to != request->ctx);
 	}
-	request->ctx = to;
 	request->tail = tail;
-	req->request = request;
 	i915_gem_request_reference(request);
-	i915_gem_context_reference(req->request->ctx);
+	i915_gem_context_reference(request->ctx);
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -577,13 +572,13 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 			break;
 
 	if (num_elements > 2) {
-		struct intel_ctx_submit_request *tail_req;
+		struct drm_i915_gem_request *tail_req;
 
 		tail_req = list_last_entry(&ring->execlist_queue,
-					   struct intel_ctx_submit_request,
+					   struct drm_i915_gem_request,
 					   execlist_link);
 
-		if (to == tail_req->request->ctx) {
+		if (to == tail_req->ctx) {
 			WARN(tail_req->elsp_submitted != 0,
 				"More than 2 already-submitted reqs queued\n");
 			list_del(&tail_req->execlist_link);
@@ -592,7 +587,7 @@ static int execlists_context_queue(struct intel_engine_cs *ring,
 		}
 	}
 
-	list_add_tail(&req->execlist_link, &ring->execlist_queue);
+	list_add_tail(&request->execlist_link, &ring->execlist_queue);
 	if (num_elements == 0)
 		execlists_context_unqueue(ring);
 
@@ -761,7 +756,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 
 void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 {
-	struct intel_ctx_submit_request *req, *tmp;
+	struct drm_i915_gem_request *req, *tmp;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	unsigned long flags;
 	struct list_head retired_list;
@@ -776,7 +771,7 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 	spin_unlock_irqrestore(&ring->execlist_lock, flags);
 
 	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		struct intel_context *ctx = req->request->ctx;
+		struct intel_context *ctx = req->ctx;
 		struct drm_i915_gem_object *ctx_obj =
 				ctx->engine[ring->id].state;
 
@@ -784,9 +779,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 			intel_lr_context_unpin(ring, ctx);
 		intel_runtime_pm_put(dev_priv);
 		i915_gem_context_unreference(ctx);
-		i915_gem_request_unreference(req->request);
+		i915_gem_request_unreference(req);
 		list_del(&req->execlist_link);
-		kfree(req);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 80a80ea..6f2d7da 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -89,33 +89,6 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
 			       u64 exec_start, u32 flags);
 u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
-/**
- * struct intel_ctx_submit_request - queued context submission request
- * @ctx: Context to submit to the ELSP.
- * @ring: Engine to submit it to.
- * @tail: how far in the context's ringbuffer this request goes to.
- * @execlist_link: link in the submission queue.
- * @work: workqueue for processing this request in a bottom half.
- * @elsp_submitted: no. of times this request has been sent to the ELSP.
- *
- * The ELSP only accepts two elements at a time, so we queue context/tail
- * pairs on a given queue (ring->execlist_queue) until the hardware is
- * available. The queue serves a double purpose: we also use it to keep track
- * of the up to 2 contexts currently in the hardware (usually one in execution
- * and the other queued up by the GPU): We only remove elements from the head
- * of the queue when the hardware informs us that an element has been
- * completed.
- *
- * All accesses to the queue are mediated by a spinlock (ring->execlist_lock).
- */
-struct intel_ctx_submit_request {
-	struct list_head execlist_link;
-
-	int elsp_submitted;
-
-	struct drm_i915_gem_request *request;
-};
-
 void intel_lrc_irq_handler(struct intel_engine_cs *ring);
 void intel_execlists_retire_requests(struct intel_engine_cs *ring);
 
-- 
2.1.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx