implemented context submission pv optimizaiton within GVTg. GVTg to read context submission data (elsp_data) from the shared_page directly without trap cost and eliminate execlist HW behavior emulation without injecting context switch interrupt to guest under PV submisison mechanism. v0: RFC. v1: rebase. v2: rebase. v3: report pv context submission cap and handle VGT_G2V_ELSP_SUBMIT g2v pv notification. v4: eliminate execlist HW emulation and don't inject context switch interrupt to guest under PV submisison mechanism. v5: rebase. v6: rebase. Signed-off-by: Xiaolin Zhang <xiaolin.zhang@xxxxxxxxx> --- drivers/gpu/drm/i915/gvt/execlist.c | 6 ++++++ drivers/gpu/drm/i915/gvt/handlers.c | 29 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/gvt/vgpu.c | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f21b8fb..e52bfd6 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -382,6 +382,9 @@ static int prepare_execlist_workload(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; int ret; + if (VGPU_PVCAP(vgpu, PV_SUBMISSION)) + return 0; + if (!workload->emulate_schedule_in) return 0; @@ -429,6 +432,9 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) goto out; } + if (VGPU_PVCAP(vgpu, PV_SUBMISSION)) + goto out; + ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); out: intel_vgpu_unpin_mm(workload->shadow_mm); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 1e09c23..9cff9396 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1692,6 +1692,31 @@ static int mmio_read_from_hw(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } +static int handle_pv_submission(struct intel_vgpu *vgpu, int ring_id) +{ + struct intel_vgpu_execlist *execlist; + u32 hw_id = vgpu->gvt->dev_priv->engine[ring_id]->hw_id; + u32 pv_elsp_off = offsetof(struct gvt_shared_page, buf.pv_elsp); + u32 submitted_off = offsetof(struct gvt_shared_page, buf.submitted); + bool submitted = true; + int ret; + + execlist = &vgpu->submission.execlist[ring_id]; + + pv_elsp_off += hw_id * sizeof(struct pv_submission); + if (intel_gvt_read_shared_page(vgpu, pv_elsp_off, + &execlist->elsp_dwords.data, sizeof(struct pv_submission))) + return -EINVAL; + + ret = intel_vgpu_submit_execlist(vgpu, ring_id); + if (ret) + submitted = false; + + submitted_off += hw_id; + ret = intel_gvt_write_shared_page(vgpu, submitted_off, &submitted, 1); + return ret; +} + static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1703,8 +1728,10 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) return -EINVAL; - execlist = &vgpu->submission.execlist[ring_id]; + if (VGPU_PVCAP(vgpu, PV_SUBMISSION) && VGT_G2V_PV_SUBMISSION == data) + return handle_pv_submission(vgpu, ring_id); + execlist = &vgpu->submission.execlist[ring_id]; execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; if (execlist->elsp_dwords.index == 3) { ret = intel_vgpu_submit_execlist(vgpu, ring_id); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 57eaf56..debdb88 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -51,6 +51,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) if (!intel_vtd_active()) vgpu_vreg_t(vgpu, vgtif_reg(pv_caps)) = PV_PPGTT_UPDATE; + vgpu_vreg_t(vgpu, vgtif_reg(pv_caps)) |= PV_SUBMISSION; vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx