On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote: > > On 10/09/2021 20:49, Matthew Brost wrote: > > On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote: > > > > > > On 20/08/2021 23:44, Matthew Brost wrote: > > > > Add logical engine mapping. This is required for split-frame, as > > > > workloads need to be placed on engines in a logically contiguous manner. > > > > > > > > v2: > > > > (Daniel Vetter) > > > > - Add kernel doc for new fields > > > > > > > > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > > > > --- > > > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 60 ++++++++++++++++--- > > > > drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 ++ > > > > .../drm/i915/gt/intel_execlists_submission.c | 1 + > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +- > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +------ > > > > 5 files changed, 60 insertions(+), 29 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > > > index 0d9105a31d84..4d790f9a65dd 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > > > @@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) > > > > GEM_DEBUG_WARN_ON(iir); > > > > } > > > > -static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) > > > > +static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, > > > > + u8 logical_instance) > > > > { > > > > const struct engine_info *info = &intel_engines[id]; > > > > struct drm_i915_private *i915 = gt->i915; > > > > @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) > > > > engine->class = info->class; > > > > engine->instance = info->instance; > > > > + engine->logical_mask = BIT(logical_instance); > > > > __sprint_engine_name(engine); > > > > engine->props.heartbeat_interval_ms = > > > > @@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) > > > > return info->engine_mask; > > > > } > > > > +static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids, > > > > + u8 class, const u8 *map, u8 num_instances) > > > > +{ > > > > + int i, j; > > > > + u8 current_logical_id = 0; > > > > + > > > > + for (j = 0; j < num_instances; ++j) { > > > > + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { > > > > + if (!HAS_ENGINE(gt, i) || > > > > + intel_engines[i].class != class) > > > > + continue; > > > > + > > > > + if (intel_engines[i].instance == map[j]) { > > > > + logical_ids[intel_engines[i].instance] = > > > > + current_logical_id++; > > > > + break; > > > > + } > > > > + } > > > > + } > > > > +} > > > > + > > > > +static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class) > > > > +{ > > > > + int i; > > > > + u8 map[MAX_ENGINE_INSTANCE + 1]; > > > > + > > > > + for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) > > > > + map[i] = i; > > > > > > What's the point of the map array since it is 1:1 with instance? > > > > > > > Future products do not have a 1 to 1 mapping and that mapping can change > > based on fusing, e.g. XeHP SDV. > > > > Also technically ICL / TGL / ADL physical instance 2 maps to logical > > instance 1. > > I don't follow the argument. All I can see is that "map[i] = i" always in > the proposed code, which is then used to check "instance == map[instance]". > So I'd suggest to remove this array from the code until there is a need for > it. > Ok, this logic is slightly confusing and makes more sense once we have non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default with the value in map[i] being a physical instance. Populate_logical_ids searches the map finding all physical instances present in the map assigning each found instance a new logical id increasing by 1 each time. e.g. If the map is setup 0-N and only physical instance 0 / 2 are present they will get logical mapping 0 / 1 respectfully. This algorithm works for non-standard mappings too /w fused parts. e.g. on XeHP SDV the map is: { 0, 2, 4, 6, 1, 3, 5, 7 } and if any of the physical instances can't be found due to fusing the logical mapping is still correct per the bspec. This array is absolutely needed for multi-lrc submission to work, even on ICL / TGL / ADL as the GuC only supports logically contiguous engine instances. > > > > + populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map)); > > > > +} > > > > + > > > > /** > > > > * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers > > > > * @gt: pointer to struct intel_gt > > > > @@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt) > > > > struct drm_i915_private *i915 = gt->i915; > > > > const unsigned int engine_mask = init_engine_mask(gt); > > > > unsigned int mask = 0; > > > > - unsigned int i; > > > > + unsigned int i, class; > > > > + u8 logical_ids[MAX_ENGINE_INSTANCE + 1]; > > > > int err; > > > > drm_WARN_ON(&i915->drm, engine_mask == 0); > > > > @@ -593,15 +627,23 @@ int intel_engines_init_mmio(struct intel_gt *gt) > > > > if (i915_inject_probe_failure(i915)) > > > > return -ENODEV; > > > > - for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { > > > > - if (!HAS_ENGINE(gt, i)) > > > > - continue; > > > > + for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { > > > > + setup_logical_ids(gt, logical_ids, class); > > > > - err = intel_engine_setup(gt, i); > > > > - if (err) > > > > - goto cleanup; > > > > + for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { > > > > + u8 instance = intel_engines[i].instance; > > > > + > > > > + if (intel_engines[i].class != class || > > > > + !HAS_ENGINE(gt, i)) > > > > + continue; > > > > - mask |= BIT(i); > > > > + err = intel_engine_setup(gt, i, > > > > + logical_ids[instance]); > > > > + if (err) > > > > + goto cleanup; > > > > + > > > > + mask |= BIT(i); > > > > > > I still this there is a less clunky way to set this up in less code and more > > > readable at the same time. Like do it in two passes so you can iterate > > > gt->engine_class[] array instead of having to implement a skip condition > > > (both on class and HAS_ENGINE at two places) and also avoid walking the flat > > > intel_engines array recursively. > > > > > > > Kinda a bikeshed arguing about a pretty simple loop structure, don't you > > think? I personally like the way it laid out. > > > > Pseudo code for your suggestion? > > Leave the existing setup loop as is and add an additional "for engine class" > walk after it. That way you can walk already setup gt->engine_class[] array > so wouldn't need to skip wrong classes and have HAS_ENGINE checks when > walking the flat intel_engines[] array several times. It also applies to the > helper which counts logical instances per class. > Ok, I think I see what you are getting at. Again IMO this is a total bikeshed as this is 1 time setup step that we really should only care if the loop works or not rather than it being optimized / looks a way a certain person wants. I can change this if you really insist but again IMO disucssing this is a total waste of energy. > > > > + } > > > > } > > > > /* > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > > > > index ed91bcff20eb..fddf35546b58 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > > > > @@ -266,6 +266,11 @@ struct intel_engine_cs { > > > > unsigned int guc_id; > > > > intel_engine_mask_t mask; > > > > + /** > > > > + * @logical_mask: logical mask of engine, reported to user space via > > > > + * query IOCTL and used to communicate with the GuC in logical space > > > > + */ > > > > + intel_engine_mask_t logical_mask; > > > > > > You could prefix the new field with uabi_ to match the existing scheme and > > > to signify to anyone who might be touching it in the future it should not be > > > changed. > > > > This is kinda uabi, but also kinda isn't. We do report a logical > > instance via IOCTL but it also really is tied the GuC backend as we only > > can communicate with the GuC in logical space. IMO we should leave as > > is. > > Perhaps it would be best to call the new field uabi_logical_instance so it's > clear it is reported in the query directly and do the BIT() transformation > in the GuC backend? > Virtual engines can have a multiple bits set in this mask, so this is used for both the query on physical engines via UABI and submission in the GuC backend. > > > > > > > > Also, I think comment should explain what is logical space ie. how the > > > numbering works. > > > > > > > Don't I already do that? I suppose I could add something like: > > Where is it? Can't see it in the uapi kerneldoc AFAICS (for the new query) > or here. > /** * @logical_mask: logical mask of engine, reported to user space via * query IOCTL and used to communicate with the GuC in logical space */ > > > > The logical mask within engine class must be contiguous across all > > instances. > > Best not to start mentioning the mask for the first time. Just explain what > logical numbering is in terms of how engines are enumerated in order of > physical instances but skipping the fused off ones. In the kerneldoc for the > new query is I think the right place. > Maybe I can add: The logical mapping is defined on per part basis in the bspec and can very based the parts fusing. > > > Not sure the part about GuC needs to be in the comment since uapi is > > > supposed to be backend agnostic. > > > > > > > Communicating with the GuC in logical space is a pretty key point here. > > The communication with the GuC in logical space is backend specific but > > how our hardware works (e.g. split frame workloads must be placed > > logical contiguous) is not. Mentioning the GuC requirement here makes > > sense to me for completeness. > > Yeah might be, I was thinking more about the new query. Query definitely is > backend agnostic but yes it is fine to say in the comment here the new field > is used both for the query and for communicating with GuC. > Sounds good, will make it clear it used for the query and from communicating with the GuC. Matt > Regards, > > Tvrtko > > > > > Matt > > > > > Regards, > > > > > > Tvrtko > > > > > > > u8 class; > > > > u8 instance; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > index cafb0608ffb4..813a6de01382 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > > > > @@ -3875,6 +3875,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) > > > > ve->siblings[ve->num_siblings++] = sibling; > > > > ve->base.mask |= sibling->mask; > > > > + ve->base.logical_mask |= sibling->logical_mask; > > > > /* > > > > * All physical engines must be compatible for their emission > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > > > > index 6926919bcac6..9f5f43a16182 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c > > > > @@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt, > > > > for_each_engine(engine, gt, id) { > > > > u8 guc_class = engine_class_to_guc_class(engine->class); > > > > - system_info->mapping_table[guc_class][engine->instance] = > > > > + system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] = > > > > engine->instance; > > > > } > > > > } > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > index e0eed70f9b92..ffafbac7335e 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > @@ -1401,23 +1401,6 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) > > > > return __guc_action_deregister_context(guc, guc_id, loop); > > > > } > > > > -static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) > > > > -{ > > > > - switch (class) { > > > > - case RENDER_CLASS: > > > > - return mask >> RCS0; > > > > - case VIDEO_ENHANCEMENT_CLASS: > > > > - return mask >> VECS0; > > > > - case VIDEO_DECODE_CLASS: > > > > - return mask >> VCS0; > > > > - case COPY_ENGINE_CLASS: > > > > - return mask >> BCS0; > > > > - default: > > > > - MISSING_CASE(class); > > > > - return 0; > > > > - } > > > > -} > > > > - > > > > static void guc_context_policy_init(struct intel_engine_cs *engine, > > > > struct guc_lrc_desc *desc) > > > > { > > > > @@ -1459,8 +1442,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) > > > > desc = __get_lrc_desc(guc, desc_idx); > > > > desc->engine_class = engine_class_to_guc_class(engine->class); > > > > - desc->engine_submit_mask = adjust_engine_mask(engine->class, > > > > - engine->mask); > > > > + desc->engine_submit_mask = engine->logical_mask; > > > > desc->hw_context_desc = ce->lrc.lrca; > > > > desc->priority = ce->guc_state.prio; > > > > desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; > > > > @@ -3260,6 +3242,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) > > > > } > > > > ve->base.mask |= sibling->mask; > > > > + ve->base.logical_mask |= sibling->logical_mask; > > > > if (n != 0 && ve->base.class != sibling->class) { > > > > DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", > > > >