It is performance optimization to reduce mmio trap numbers from 4 to 1 durning ELSP porting writing (context submission). When context subission, to cache elsp_data[4] values in the shared page, the last elsp_data[0] port writing will be trapped to gvt for real context submission. Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization. v0: RFC v1: rebase v2: added pv ops for pv context submission. to maximize code resuse, introduced 2 more ops (submit_ports & preempt_context) instead of 1 op (set_default_submission) in engine structure. pv version of submit_ports and preempt_context implemented. v3: 1. to reduce more code duplication, code refactor and replaced 2 ops "submit_ports & preempt_contex" from v2 by 1 ops "write_desc" in engine structure. pv version of write_des implemented. 2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification. Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx> Cc: Zhi Wang <zhi.a.wang@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: He Min <min.he@xxxxxxxxx> Cc: Jiang Fei <fei.jiang@xxxxxxxxx> Cc: Gong Zhipeng <zhipeng.gong@xxxxxxxxx> Cc: Yuan Hang <hang.yuan@xxxxxxxxx> Cc: Zhiyuan Lv <zhiyuan.lv@xxxxxxxxx> Signed-off-by: Xiaolin Zhang <xiaolin.zhang@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_pvinfo.h | 1 + drivers/gpu/drm/i915/i915_vgpu.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 33 +++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index aa5eebc..3da644d 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -49,6 +49,7 @@ enum vgt_g2v_type { VGT_G2V_EXECLIST_CONTEXT_CREATE, VGT_G2V_EXECLIST_CONTEXT_DESTROY, VGT_G2V_SHARED_PAGE_SETUP, + VGT_G2V_ELSP_SUBMIT, VGT_G2V_MAX, }; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 63f70bf..82120f6 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -85,6 +85,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) return; } + dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT; + /* If guest wants to enable pvmmio, it needs to enable it explicitly * through vgt_if interface, and then read back the enable state from * gvt layer. diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ff0e2b3..660e24c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -391,8 +391,10 @@ static u64 execlists_update_context(struct i915_request *rq) return ce->lrc_desc; } -static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) +static inline void write_desc(struct intel_engine_cs *engine, + u64 desc, u32 port) { + struct intel_engine_execlists *execlists = &engine->execlists; if (execlists->ctrl_reg) { writel(lower_32_bits(desc), execlists->submit_reg + port * 2); writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); @@ -402,6 +404,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc } } +static inline void write_desc_pv(struct intel_engine_cs *engine, + u64 desc, u32 port) +{ + struct drm_i915_private *dev_priv = engine->i915; + u32 *elsp_data; + + spin_lock(&engine->i915->vgpu.shared_page_lock); + elsp_data = engine->i915->vgpu.shared_page->elsp_data; + elsp_data[engine->id * 4 + port * 2] = upper_32_bits(desc); + elsp_data[engine->id * 4 + port * 2 + 1] = lower_32_bits(desc); + if (port == 0) { + engine->i915->vgpu.shared_page->ring_id = engine->id; + __raw_i915_write32(dev_priv, vgtif_reg(g2v_notify), + VGT_G2V_ELSP_SUBMIT); + } + spin_unlock(&engine->i915->vgpu.shared_page_lock); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; @@ -450,7 +470,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - write_desc(execlists, desc, n); + engine->write_desc(engine, desc, n); } /* we need to manually load the submit queue */ @@ -504,9 +524,9 @@ static void inject_preempt_context(struct intel_engine_cs *engine) */ GEM_TRACE("%s\n", engine->name); for (n = execlists_num_ports(execlists); --n; ) - write_desc(execlists, 0, n); + engine->write_desc(engine, 0, n); - write_desc(execlists, ce->lrc_desc, n); + engine->write_desc(engine, ce->lrc_desc, n); /* we need to manually load the submit queue */ if (execlists->ctrl_reg) @@ -2134,6 +2154,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->reset.prepare = execlists_reset_prepare; + engine->write_desc = write_desc; + + if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) + engine->write_desc = write_desc_pv; + engine->park = NULL; engine->unpark = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f6ec48a..b752aab 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -523,6 +523,9 @@ struct intel_engine_cs { void (*irq_seqno_barrier)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine); + void (*write_desc)(struct intel_engine_cs *engine, + u64 desc, u32 port); + /* GEN8 signal/wait table - never trust comments! * signal to signal to signal to signal to signal to * RCS VCS BCS VECS VCS2 -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx