It is performance optimization to reduce mmio trap numbers from 4 to 1 durning ELSP porting writing (context submission). When context subission, to cache elsp_data[4] values in the shared page, the last elsp_data[0] port writing will be trapped to gvt for real context submission. Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization. v1: rebase v0: RFC Signed-off-by: Xiaolin Zhang <xiaolin.zhang@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_vgpu.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 609eefe..84241a7 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -66,6 +66,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); + dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT; + magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d604d8a..1f52633 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -407,6 +407,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) struct intel_engine_execlists *execlists = &engine->execlists; struct execlist_port *port = execlists->port; unsigned int n; + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u32 *elsp_data; + u32 descs[4]; + int i = 0; /* * We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -449,8 +454,24 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) GEM_BUG_ON(!n); desc = 0; } + if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) { + GEM_BUG_ON(i >= 4); + descs[i] = upper_32_bits(desc); + descs[i + 1] = lower_32_bits(desc); + i += 2; + } else { + write_desc(execlists, desc, n); + } + } - write_desc(execlists, desc, n); + if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) { + spin_lock(&engine->i915->vgpu.shared_page_lock); + elsp_data = engine->i915->vgpu.shared_page->elsp_data; + *elsp_data = descs[0]; + *(elsp_data + 1) = descs[1]; + *(elsp_data + 2) = descs[2]; + writel(descs[3], elsp); + spin_unlock(&engine->i915->vgpu.shared_page_lock); } /* we need to manually load the submit queue */ @@ -493,11 +514,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine) struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = to_intel_context(engine->i915->preempt_context, engine); + u32 __iomem *elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + u32 *elsp_data; unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != upper_32_bits(ce->lrc_desc)); + if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) { + spin_lock(&engine->i915->vgpu.shared_page_lock); + elsp_data = engine->i915->vgpu.shared_page->elsp_data; + *elsp_data = 0; + *(elsp_data + 1) = 0; + *(elsp_data + 2) = upper_32_bits(ce->lrc_desc); + writel(lower_32_bits(ce->lrc_desc), elsp); + spin_unlock(&engine->i915->vgpu.shared_page_lock); + return; + } + /* * Switch to our empty preempt context so * the state of the GPU is known (idle). -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx