Quoting Xiaolin Zhang (2019-09-17 06:48:16) > It is performance optimization to override the actual submisison backend > in order to eliminate execlists csb process and reduce mmio trap numbers > for workload submission without context switch interrupt by talking with > GVT via PV submisison notification mechanism between guest and GVT. > > Use PV_SUBMISSION to control this level of pv optimization. > > v0: RFC. > v1: rebase. > v2: added pv ops for pv context submission. to maximize code resuse, > introduced 2 more ops (submit_ports & preempt_context) instead of 1 op > (set_default_submission) in engine structure. pv version of > submit_ports and preempt_context implemented. > v3: > 1. to reduce more code duplication, code refactor and replaced 2 ops > "submit_ports & preempt_contex" from v2 by 1 ops "write_desc" > in engine structure. pv version of write_des implemented. > 2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification. > v4: implemented pv elsp submission tasklet as the backend workload > submisison by talking to GVT with PV notificaiton mechanism and renamed > VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION. > v5: addressed v4 comments from Chris, intel_pv_submission.c added. > v6: addressed v5 comments from Chris, replaced engine id by hw_id. > v7: rebase. > v8: addressed v7 comments from Chris. > > Signed-off-by: Xiaolin Zhang <xiaolin.zhang@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 2 +- > drivers/gpu/drm/i915/gt/intel_lrc.c | 12 +- > drivers/gpu/drm/i915/i915_vgpu.c | 18 +- > drivers/gpu/drm/i915/i915_vgpu.h | 15 ++ > drivers/gpu/drm/i915/intel_pv_submission.c | 300 +++++++++++++++++++++++++++++ > 5 files changed, 341 insertions(+), 6 deletions(-) > create mode 100644 drivers/gpu/drm/i915/intel_pv_submission.c > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 658b930..f500cf1 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -250,7 +250,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ > selftests/igt_spinner.o > > # virtual gpu code > -i915-y += i915_vgpu.o > +i915-y += i915_vgpu.o intel_pv_submission.o > > ifeq ($(CONFIG_DRM_I915_GVT),y) > i915-y += intel_gvt.o > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index a3f0e49..5587aff 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -2933,10 +2933,14 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) > engine->unpark = NULL; > > engine->flags |= I915_ENGINE_SUPPORTS_STATS; > - if (!intel_vgpu_active(engine->i915)) { > - engine->flags |= I915_ENGINE_HAS_SEMAPHORES; > - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) > - engine->flags |= I915_ENGINE_HAS_PREEMPTION; > + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; > + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) > + engine->flags |= I915_ENGINE_HAS_PREEMPTION; > + > + if (intel_vgpu_active(engine->i915)) { > + engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES; > + engine->flags &= ~I915_ENGINE_HAS_PREEMPTION; > + intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, engine); > } > > if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */ > diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c > index e458892..a488b68 100644 > --- a/drivers/gpu/drm/i915/i915_vgpu.c > +++ b/drivers/gpu/drm/i915/i915_vgpu.c > @@ -97,7 +97,7 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv) > mutex_init(&dev_priv->vgpu.lock); > > /* guest driver PV capability */ > - dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE; > + dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE | PV_SUBMISSION; > > if (!intel_vgpu_check_pv_caps(dev_priv, shared_area)) { > DRM_INFO("Virtual GPU for Intel GVT-g detected.\n"); > @@ -389,6 +389,7 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv, > enum pv_caps cap, void *data) > { > struct i915_ppgtt *ppgtt; > + struct intel_engine_cs *engine; > > if (!intel_vgpu_enabled_pv_caps(dev_priv, cap)) > return; > @@ -399,6 +400,11 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv, > ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv; > ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv; > } > + > + if (cap == PV_SUBMISSION) { > + engine = (struct intel_engine_cs *)data; > + vgpu_set_pv_submission(engine); > + } > } > > /** > @@ -594,6 +600,8 @@ static int intel_vgpu_setup_shared_page(struct drm_i915_private *dev_priv, > u64 gpa; > u16 ver_maj, ver_min; > int ret = 0; > + int i; > + u32 size; > > /* We allocate 1 page shared between guest and GVT for data exchange. > * ___________..................... > @@ -666,6 +674,14 @@ static int intel_vgpu_setup_shared_page(struct drm_i915_private *dev_priv, > pv->notify = intel_vgpu_pv_notify_mmio; > mutex_init(&pv->send_mutex); > > + /* setup PV per engine data exchange structure */ > + size = sizeof(struct pv_submission); > + for (i = 0; i < PV_MAX_ENGINES_NUM; i++) { > + pv->pv_elsp[i] = (void *)base + PV_ELSP_OFF + size * i; > + pv->pv_elsp[i]->submitted = false; > + spin_lock_init(&pv->pv_elsp[i]->lock); > + } > + > return ret; > err: > __free_page(virt_to_page(base)); > diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h > index b0fee5b..c33cb05 100644 > --- a/drivers/gpu/drm/i915/i915_vgpu.h > +++ b/drivers/gpu/drm/i915/i915_vgpu.h > @@ -29,6 +29,8 @@ > > #define PV_MAJOR 1 > #define PV_MINOR 0 > +#define PV_MAX_ENGINES_NUM (VECS1_HW + 1) > +#define PV_ELSP_OFF (PAGE_SIZE/8) > #define PV_DESC_OFF (PAGE_SIZE/4) > #define PV_CMD_OFF (PAGE_SIZE/2) > > @@ -37,6 +39,7 @@ > */ > enum pv_caps { > PV_PPGTT_UPDATE = 0x1, > + PV_SUBMISSION = 0x2, > }; > > /* PV actions */ > @@ -45,6 +48,7 @@ enum intel_vgpu_pv_action { > PV_ACTION_PPGTT_L4_ALLOC, > PV_ACTION_PPGTT_L4_CLEAR, > PV_ACTION_PPGTT_L4_INSERT, > + PV_ACTION_ELSP_SUBMISSION, > }; > > /* > @@ -56,6 +60,13 @@ struct gvt_shared_page { > u16 ver_minor; > }; > > +/* workload submission pv support data structure */ > +struct pv_submission { > + u64 descs[EXECLIST_MAX_PORTS]; > + bool submitted; > + spinlock_t lock; > +}; > + > /* > * Definition of the command transport message header (DW0) > * > @@ -100,6 +111,9 @@ struct i915_virtual_gpu_pv { > struct gvt_shared_page *shared_page; > bool enabled; > > + /* per engine PV workload submission data */ > + struct pv_submission *pv_elsp[PV_MAX_ENGINES_NUM]; > + > /* PV command buffer support */ > struct vgpu_pv_ct_buffer ctb; > u32 next_fence; > @@ -164,4 +178,5 @@ bool intel_vgpu_check_pv_caps(struct drm_i915_private *dev_priv, > void __iomem *shared_area); > void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv, > enum pv_caps cap, void *data); > +void vgpu_set_pv_submission(struct intel_engine_cs *engine); > #endif /* _I915_VGPU_H_ */ > diff --git a/drivers/gpu/drm/i915/intel_pv_submission.c b/drivers/gpu/drm/i915/intel_pv_submission.c > new file mode 100644 > index 0000000..1083d56 > --- /dev/null > +++ b/drivers/gpu/drm/i915/intel_pv_submission.c > @@ -0,0 +1,300 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2018 Intel Corporation > + */ > + > +#include "i915_vgpu.h" > +#include "gt/intel_lrc_reg.h" > +#include "gt/intel_gt_pm.h" > +#include "i915_trace.h" > + > +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) > + > +static u64 execlists_update_context(struct i915_request *rq) > +{ > + struct intel_context *ce = rq->hw_context; > + u32 *reg_state = ce->lrc_reg_state; > + > + reg_state[CTX_RING_TAIL + 1] = intel_ring_set_tail(rq->ring, rq->tail); > + > + return ce->lrc_desc; > +} > + > +static inline struct i915_priolist *to_priolist(struct rb_node *rb) > +{ > + return rb_entry(rb, struct i915_priolist, node); > +} > + > +static void pv_submit(struct intel_engine_cs *engine, > + struct i915_request **out, > + struct i915_request **end) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + struct i915_virtual_gpu_pv *pv = engine->i915->vgpu.pv; > + struct pv_submission *pv_elsp = pv->pv_elsp[engine->hw_id]; > + struct i915_request *rq; > + int n, err; > + > + memset(pv_elsp->descs, 0, sizeof(pv_elsp->descs)); > + n = 0; > + > + do { > + rq = *out++; > + pv_elsp->descs[n++] = execlists_update_context(rq); > + } while (out != end); > + > + spin_lock(&pv_elsp->lock); > + pv_elsp->submitted = true; > + writel(PV_ACTION_ELSP_SUBMISSION, execlists->submit_reg); > + > +#define done (READ_ONCE(pv_elsp->submitted) == false) > + err = wait_for_atomic_us(done, 1000); Please tell me you have a better plan than adding a 1ms wait with interrupts off. > +#undef done > + spin_unlock(&pv_elsp->lock); > + > + if (unlikely(err)) > + DRM_ERROR("PV (%s) workload submission failed\n", engine->name); > + > +} > +void vgpu_set_pv_submission(struct intel_engine_cs *engine) > +{ > + /* > + * We inherit a bunch of functions from execlists that we'd like > + * to keep using: > + * > + * engine->submit_request = execlists_submit_request; > + * engine->cancel_requests = execlists_cancel_requests; > + * engine->schedule = execlists_schedule; > + * > + * But we need to override the actual submission backend in order > + * to talk to the GVT with PV notification message. > + */ > + > + engine->execlists.tasklet.func = vgpu_pv_submission_tasklet; > + > + /* do not use execlists park/unpark */ > + engine->park = engine->unpark = NULL; But this was used to enable the interrupts so that your tasklet was called. guc dropped it because they only plan to run on HW that always sends the interrupts; your supported HW needs the interrupts enabled on demand. Please go and add a test to demonstrate this. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx