[PATCH v3 3/8] drm/i915: context submission pvmmio optimization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).

When context subission, to cache elsp_data[4] values in
the shared page, the last elsp_data[0] port writing will be trapped
to gvt for real context submission.

Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse,
introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of
submit_ports and preempt_context implemented.
v3:
1. to reduce more code duplication, code refactor and replaced 2 ops
"submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
in engine structure. pv version of write_des implemented.
2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.

Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx>
Cc: Zhi Wang <zhi.a.wang@xxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: He Min <min.he@xxxxxxxxx>
Cc: Jiang Fei <fei.jiang@xxxxxxxxx>
Cc: Gong Zhipeng <zhipeng.gong@xxxxxxxxx>
Cc: Yuan Hang <hang.yuan@xxxxxxxxx>
Cc: Zhiyuan Lv <zhiyuan.lv@xxxxxxxxx>
Signed-off-by: Xiaolin Zhang <xiaolin.zhang@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_pvinfo.h      |  1 +
 drivers/gpu/drm/i915/i915_vgpu.c        |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c        | 33 +++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
index aa5eebc..3da644d 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -49,6 +49,7 @@ enum vgt_g2v_type {
 	VGT_G2V_EXECLIST_CONTEXT_CREATE,
 	VGT_G2V_EXECLIST_CONTEXT_DESTROY,
 	VGT_G2V_SHARED_PAGE_SETUP,
+	VGT_G2V_ELSP_SUBMIT,
 	VGT_G2V_MAX,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 63f70bf..82120f6 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -85,6 +85,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 		return;
 	}
 
+	dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT;
+
 	/* If guest wants to enable pvmmio, it needs to enable it explicitly
 	 * through vgt_if interface, and then read back the enable state from
 	 * gvt layer.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..660e24c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -391,8 +391,10 @@ static u64 execlists_update_context(struct i915_request *rq)
 	return ce->lrc_desc;
 }
 
-static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
+static inline void write_desc(struct intel_engine_cs *engine,
+			u64 desc, u32 port)
 {
+	struct intel_engine_execlists *execlists = &engine->execlists;
 	if (execlists->ctrl_reg) {
 		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
 		writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
@@ -402,6 +404,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
 	}
 }
 
+static inline void write_desc_pv(struct intel_engine_cs *engine,
+			u64 desc, u32 port)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	u32 *elsp_data;
+
+	spin_lock(&engine->i915->vgpu.shared_page_lock);
+	elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+	elsp_data[engine->id * 4 + port * 2] = upper_32_bits(desc);
+	elsp_data[engine->id * 4 + port * 2 + 1] = lower_32_bits(desc);
+	if (port == 0) {
+		engine->i915->vgpu.shared_page->ring_id = engine->id;
+		__raw_i915_write32(dev_priv, vgtif_reg(g2v_notify),
+				VGT_G2V_ELSP_SUBMIT);
+	}
+	spin_unlock(&engine->i915->vgpu.shared_page_lock);
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
@@ -450,7 +470,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = 0;
 		}
 
-		write_desc(execlists, desc, n);
+		engine->write_desc(engine, desc, n);
 	}
 
 	/* we need to manually load the submit queue */
@@ -504,9 +524,9 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	 */
 	GEM_TRACE("%s\n", engine->name);
 	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+		engine->write_desc(engine, 0, n);
 
-	write_desc(execlists, ce->lrc_desc, n);
+	engine->write_desc(engine, ce->lrc_desc, n);
 
 	/* we need to manually load the submit queue */
 	if (execlists->ctrl_reg)
@@ -2134,6 +2154,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 
 	engine->reset.prepare = execlists_reset_prepare;
 
+	engine->write_desc = write_desc;
+
+	if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT))
+		engine->write_desc = write_desc_pv;
+
 	engine->park = NULL;
 	engine->unpark = NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a..b752aab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -523,6 +523,9 @@ struct intel_engine_cs {
 	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
 	void		(*cleanup)(struct intel_engine_cs *engine);
 
+	void		(*write_desc)(struct intel_engine_cs *engine,
+					u64 desc, u32 port);
+
 	/* GEN8 signal/wait table - never trust comments!
 	 *	  signal to	signal to    signal to   signal to      signal to
 	 *	    RCS		   VCS          BCS        VECS		 VCS2
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux