>> @@ -157,6 +163,9 @@ int gen11_emit_flush_rcs(struct i915_request *rq, >> u32 mode) >> intel_ring_advance(rq, cs); >> } >> >> + /* hsdes: 1809175790. No fixup needed for gen11 rcs */ >> + rq->aux_inv_fixup = NULL; > > This is a little ugly to me. Can we just set this to 0 or 0xdeadbeef by default maybe and check that value below instead of retroactively adding all of these assignments? The problem is there are many code paths that a i915_request could be allocated, I'm not aware of a unified routine where I could initialize the pointer for all i915_requests. >> + >> return 0; >> } >> >> + /* >> + * We don't know which engine will eventually carry out >> + * this request, so the mmio aux_inv register address >> is >> + * unknown at this moment. We save the cs pointer >> supposed >> + * to hold the aux_inv address in rq->aux_inv_fixup and >> set >> + * it in execlists_dequeue() when the engine instance >> + * carrying out this request becomes certain >> + */ >> + *cs++ = MI_LOAD_REGISTER_IMM(1); >> + rq->aux_inv_fixup = cs; /* save the pointer to aux_inv >> */ >> + *cs++ = 0; /* mmio addr to be set at submission to HW >> */ > >Maybe MI_NOOP instead? This is supposed to be the mmio address field for the MI_LOAD_REGISTER_IMM instruction, setting it to 0 makes more sense? >> + *cs++ = AUX_INV; >> *cs++ = MI_NOOP; >> - } >> + } else > > Can you add the brackets here on the else: > } else { > aux_inv_fixup = NULL > } > >Also good to run checkpatch. I see this showing up as a warning in the checkpatch results. I noticed the warning, will update. >> + rq->aux_inv_fixup = NULL; >> >> if (mode & EMIT_INVALIDATE) >> *cs++ = preparser_disable(false); >> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> index e1470bb60f34..7e8552414275 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c >> @@ -1258,6 +1258,34 @@ static bool completed(const struct i915_request >> *rq) >> return __i915_request_is_complete(rq); } >> >> +static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) { >> + static const i915_reg_t vd[] = { >> + GEN12_VD0_AUX_NV, >> + GEN12_VD1_AUX_NV, >> + GEN12_VD2_AUX_NV, >> + GEN12_VD3_AUX_NV, >> + }; >> + >> + static const i915_reg_t ve[] = { >> + GEN12_VE0_AUX_NV, >> + GEN12_VE1_AUX_NV, >> + }; >> + >> + if (engine->class == VIDEO_DECODE_CLASS) { >> + GEM_BUG_ON(engine->instance >= ARRAY_SIZE(vd)); >> + return vd[engine->instance]; >> + } >> + >> + if (engine->class == VIDEO_ENHANCEMENT_CLASS) { >> + GEM_BUG_ON(engine->instance >= ARRAY_SIZE(ve)); >> + return ve[engine->instance]; >> + } >> + >> + GEM_BUG_ON("unknown aux_inv reg\n"); >> + return INVALID_MMIO_REG; >> +} >> + >> static void execlists_dequeue(struct intel_engine_cs *engine) > > So in the previous implementation, this "worked" for both execlists and guc submission. But how will this work now for GuC based submission? > This flow and the address of the engine is owned by the GuC. > > If we are going to say this is an execlist only requirement (e.g. > platforms using GuC submission don't need this workaround), you should add an if (!using guc submission) in the sequence you added to the various emit_flush() routines above. Good point. I didn't consider GuC submission because Chrome doesn't enable GuC for TGL. But it is true that the implementation will have problem with GuC submission. I'm not sure if it's possible for i915 to know which engine will eventually carry out the request because it might be scheduled by GuC. I will need to investigate. > Thanks, > Stuart