On Thu, Oct 14, 2021 at 11:42:41AM -0700, John Harrison wrote: > On 10/14/2021 10:20, Matthew Brost wrote: > > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for > > execlists. Doing as little as possible to support this interface for > > execlists - basically just passing submit fences between each request > > generated and virtual engines are not allowed. This is on par with what > > is there for the existing (hopefully soon deprecated) bonding interface. > > > > We perma-pin these execlists contexts to align with GuC implementation. > > > > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 ++-- > > drivers/gpu/drm/i915/gt/intel_context.c | 4 +- > > .../drm/i915/gt/intel_execlists_submission.c | 56 ++++++++++++++++++- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 - > > 5 files changed, 64 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > index fb33d0322960..35e87a7d0ea9 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > struct intel_engine_cs **siblings = NULL; > > intel_engine_mask_t prev_mask; > > - /* FIXME: This is NIY for execlists */ > > - if (!(intel_uc_uses_guc_submission(&i915->gt.uc))) > > - return -ENODEV; > > - > > if (get_user(slot, &ext->engine_index)) > > return -EFAULT; > > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, > > if (get_user(num_siblings, &ext->num_siblings)) > > return -EFAULT; > > + if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) { > > + drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n", > > + num_siblings); > > + return -EINVAL; > > + } > > + > > if (slot >= set->num_engines) { > > drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", > > slot, set->num_engines); > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > index 5634d14052bc..1bec92e1d8e6 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce) > > __i915_active_acquire(&ce->active); > > - if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) > > + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || > > + intel_context_is_parallel(ce)) > > return 0; > > /* Preallocate tracking nodes */ > > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent, > > * Callers responsibility to validate that this function is used > > * correctly but we use GEM_BUG_ON here ensure that they do. > > */ > > - GEM_BUG_ON(!intel_engine_uses_guc(parent->engine)); > > GEM_BUG_ON(intel_context_is_pinned(parent)); > > GEM_BUG_ON(intel_context_is_child(parent)); > > GEM_BUG_ON(intel_context_is_pinned(child)); > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > index bedb80057046..8cd986bdf26c 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > > static bool ctx_single_port_submission(const struct intel_context *ce) > > { > > - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > > - intel_context_force_single_submission(ce)); > > + return intel_context_force_single_submission(ce); > Does this change not affect all execlist operation rather than just parallel > submission? > I don't think so. The only place that sets single submission was in the GVT code. I think was an optimization so this would just compile out if GVT wasn't built. > > } > > static bool can_merge_ctx(const struct intel_context *prev, > > @@ -2598,6 +2597,58 @@ static void execlists_context_cancel_request(struct intel_context *ce, > > current->comm); > > } > > +static struct intel_context * > > +execlists_create_parallel(struct intel_engine_cs **engines, > > + unsigned int num_siblings, > > + unsigned int width) > > +{ > > + struct intel_engine_cs **siblings = NULL; > > + struct intel_context *parent = NULL, *ce, *err; > > + int i, j; > > + > > + GEM_BUG_ON(num_siblings != 1); > > + > > + siblings = kmalloc_array(num_siblings, > > + sizeof(*siblings), > > + GFP_KERNEL); > > + if (!siblings) > > + return ERR_PTR(-ENOMEM); > > + > > + for (i = 0; i < width; ++i) { > > + for (j = 0; j < num_siblings; ++j) > > + siblings[j] = engines[i * num_siblings + j]; > What is the purpose of this array? The only usage that I can see is > siblings[0] on the line below. The rest of the entries never seem to be > used. So you could just replace that with 'engines[i * num_siblings]' and > drop the siblings array itself completely? > Yes, this can be dropped as num_siblings must be 1. I think this was copied and pasted from the GuC code where num_siblings could be more than 1. Will fixup in a standalone patch after we merge the GuC parallel submission. Matt > John. > > > > + > > + ce = intel_context_create(siblings[0]); > > + if (!ce) { > > + err = ERR_PTR(-ENOMEM); > > + goto unwind; > > + } > > + > > + if (i == 0) > > + parent = ce; > > + else > > + intel_context_bind_parent_child(parent, ce); > > + } > > + > > + parent->parallel.fence_context = dma_fence_context_alloc(1); > > + > > + intel_context_set_nopreempt(parent); > > + intel_context_set_single_submission(parent); > > + for_each_child(parent, ce) { > > + intel_context_set_nopreempt(ce); > > + intel_context_set_single_submission(ce); > > + } > > + > > + kfree(siblings); > > + return parent; > > + > > +unwind: > > + if (parent) > > + intel_context_put(parent); > > + kfree(siblings); > > + return err; > > +} > > + > > static const struct intel_context_ops execlists_context_ops = { > > .flags = COPS_HAS_INFLIGHT, > > @@ -2616,6 +2667,7 @@ static const struct intel_context_ops execlists_context_ops = { > > .reset = lrc_reset, > > .destroy = lrc_destroy, > > + .create_parallel = execlists_create_parallel, > > .create_virtual = execlists_create_virtual, > > }; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 56156cf18c41..70f4b309522d 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce, > > void lrc_unpin(struct intel_context *ce) > > { > > + if (unlikely(ce->parallel.last_rq)) > > + i915_request_put(ce->parallel.last_rq); > > check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, > > ce->engine); > > } > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index d7710debcd47..145ffe265305 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -2960,8 +2960,6 @@ static void guc_parent_context_unpin(struct intel_context *ce) > > GEM_BUG_ON(!intel_context_is_parent(ce)); > > GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); > > - if (ce->parallel.last_rq) > > - i915_request_put(ce->parallel.last_rq); > > unpin_guc_id(guc, ce); > > lrc_unpin(ce); > > } >