implemented pv workload submission support within GVTg. GVTg to read engine submission data (engine lrc) from the shared_page with pv interface to reduce mmio trap cost and then eliminate execlist HW behavior emulation by removing injecting context switch interrupt to guest under workload submisison pv mode to improve efficiency. Signed-off-by: Xiaolin Zhang <xiaolin.zhang@xxxxxxxxx> --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 101 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/vgpu.c | 1 + 3 files changed, 103 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 05c2f13..18c0926 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -217,6 +217,7 @@ struct intel_vgpu { u32 pv_caps; u64 shared_page_gpa; bool shared_page_enabled; + u64 pv_sub_gpa; }; static inline void *intel_vgpu_vdev(struct intel_vgpu *vgpu) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index f1ad024..399427d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1323,6 +1323,7 @@ static int pv_command_buffer_read(struct intel_vgpu *vgpu, static int handle_pv_commands(struct intel_vgpu *vgpu) { + struct pv_cap_addr *cap_addr; struct intel_vgpu_mm *mm; struct pv_vma *vma; u64 pdp; @@ -1336,6 +1337,17 @@ static int handle_pv_commands(struct intel_vgpu *vgpu) return ret; switch (cmd) { + case PV_CMD_REGISTER_CAP_GPA: + cap_addr = (struct pv_cap_addr *)data; + switch (cap_addr->cap) { + case PV_SUBMISSION: + vgpu->pv_sub_gpa = cap_addr->gpa; + break; + default: + gvt_vgpu_err("invalid pv cap 0x%x\n", cap_addr->cap); + break; + } + break; case PV_CMD_BIND_PPGTT: case PV_CMD_UNBIND_PPGTT: vma = (struct pv_vma *)data; @@ -1858,6 +1870,91 @@ static int mmio_read_from_hw(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } +static int pv_prepare_workload(struct intel_vgpu_workload *workload) +{ + return 0; +} + +static int pv_complete_workload(struct intel_vgpu_workload *workload) +{ + return 0; +} + +static int submit_context_pv(struct intel_vgpu *vgpu, + const struct intel_engine_cs *engine, + struct execlist_ctx_descriptor_format *desc, + bool emulate_schedule_in) +{ + struct intel_vgpu_workload *workload = NULL; + + workload = intel_vgpu_create_workload(vgpu, engine, desc); + if (IS_ERR(workload)) + return PTR_ERR(workload); + + workload->prepare = pv_prepare_workload; + workload->complete = pv_complete_workload; + + intel_vgpu_queue_workload(workload); + return 0; +} + +#define get_desc_from_elsp_dwords(ed, i) \ + ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) + +static int handle_pv_submission(struct intel_vgpu *vgpu, + const struct intel_engine_cs *engine) +{ + struct intel_vgpu_execlist *execlist; + struct pv_submission subdata; + struct execlist_ctx_descriptor_format *desc[2]; + u32 ring_id = engine->id; + u64 base = vgpu->pv_sub_gpa + ring_id * sizeof(struct pv_submission); + u64 submit_off = offsetof(struct pv_submission, submitted) + base; + bool submitted = false; + int i, ret; + + execlist = &vgpu->submission.execlist[ring_id]; + if (intel_gvt_hypervisor_read_gpa(vgpu, base, &subdata, sizeof(subdata))) + return -EINVAL; + + desc[0] = (struct execlist_ctx_descriptor_format *)&(subdata.descs[0]); + desc[1] = (struct execlist_ctx_descriptor_format *)&(subdata.descs[1]); + + if (!desc[0]->valid) { + gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n"); + goto inv_desc; + } + + for (i = 0; i < ARRAY_SIZE(desc); i++) { + if (!desc[i]->valid) + continue; + if (!desc[i]->privilege_access) { + gvt_vgpu_err("unexpected GGTT elsp submission\n"); + goto inv_desc; + } + } + + /* submit workload */ + for (i = 0; i < ARRAY_SIZE(desc); i++) { + if (!desc[i]->valid) + continue; + + ret = submit_context_pv(vgpu, engine, desc[i], i == 0); + if (ret) { + gvt_vgpu_err("failed to submit desc %d\n", i); + return ret; + } + } + + ret = intel_gvt_hypervisor_write_gpa(vgpu, submit_off, &submitted, 1); + return ret; + +inv_desc: + gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n", + desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw); + return -EINVAL; +} + static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1870,6 +1967,10 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (drm_WARN_ON(&i915->drm, !engine)) return -EINVAL; + if (intel_vgpu_enabled_pv_cap(vgpu, PV_SUBMISSION) && + data == PV_CMD_SUBMIT_WORKLOAD) + return handle_pv_submission(vgpu, engine); + execlist = &vgpu->submission.execlist[engine->id]; execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 1411c7b5..6737cf7 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -51,6 +51,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_PV; vgpu_vreg_t(vgpu, vgtif_reg(pv_caps)) = PV_PPGTT | PV_GGTT; + vgpu_vreg_t(vgpu, vgtif_reg(pv_caps)) |= PV_SUBMISSION; vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx