The CSB format has been reworked for Gen12 to include information on both the context we're switching away from and the context we're switching to. After the change, some of the events don't have their own bit anymore and need to be inferred from other values in the csb. One of the context IDs (0x7FF) has also been reserved to indicate the invalid ctx, i.e. engine idle. To keep the logic simple, we can convert the new gen12 format to the gen11 format and re-use the legacy csb handling logic. This result in a sliglthly slower handling for gen12 but allows us to get running with a relatively small change and avoids code duplication. RFC: base TGL support [1] is not yet in the tree, but I wanted to get some early comments on this because I'm not totally convinced that the conversion is the best way of doing this. This way if another approach is proposed I can do the work while the base support gets reviewed. If we stick with the conversion we can probably optimize a bit. Toughts? [1]: https://patchwork.freedesktop.org/series/62726/ Bspec: 45555, 46144 Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 93 ++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 471e134de186..61cf016b8e15 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -164,6 +164,15 @@ #define CTX_DESC_FORCE_RESTORE BIT_ULL(2) +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) (csb_dw & 0xF) /* upper csb dword */ +#define GEN12_CTX_COMPLETE (0x0) +#define GEN12_CTX_PREEMPTED (0x5) +#define GEN12_CSB_CTX(csb_dw) (((csb_dw) & GENMASK_ULL(31, 15)) >> 15) +#define GEN12_CTX_SW_ID(ctx) ((ctx) & GENMASK(10, 0)) +#define GEN12_CTX_SW_COUNTER(ctx) (((ctx) & GENMASK(16, 11)) >> 11) +#define GEN12_CTX_INVALID_ID 0x7FF + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ #define WA_TAIL_DWORDS 2 @@ -1279,6 +1288,80 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } +/* + * Starting with Gen12, the csb has a new format: + * + * lower_dw: + * bit 0: switched to new queue + * bit 1: reserved + * bits 3-5: engine class + * bits 6-11: engine instance + * bits 12-14: reserved + * bits 15-25: sw context id of the lrc we're switching to + * bits 26-31: sw counter of the lrc we're switching to + * + * upper_dw: + * bits 0-3: context switch detail + * 0: ctx complete + * 1: wait on sync flip + * 2: wait on vblank + * 3: wait on scanline + * 4: wait on semaphore + * 5: ctx preempted (not on SEMAPHORE_WAIT or WAIT_FOR_EVENT) + * bit 4: reserved + * bits 5-11: wait detail (for switch detail 1 to 4) + * bits 12-14: reserved + * bits 15-25: sw context id of the lrc we're switching away from + * bits 26-31: sw counter of the lrc we're switching away from + * + * To keep the logic simple, we convert the new gen12 format to the gen11 + * format and re-use the legacy csb handling logic. The upper 32 bits of the + * legacy status should be set to the upper 32 bits of the descriptor of the + * ctx we're switching away from, but since we don't use that value in our + * logic at the moment we skip that setting. + */ +static u32 +gen12_convert_csb(struct intel_engine_cs *engine, u32 lower_dw, u32 upper_dw) +{ + u32 legacy_status = 0; + u32 ctx_to = GEN12_CSB_CTX(lower_dw); + u32 ctx_away = GEN12_CSB_CTX(upper_dw); + u8 switch_detail = GEN12_CTX_SWITCH_DETAIL(upper_dw); + bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + bool ctx_to_valid = GEN12_CTX_SW_ID(ctx_to) != GEN12_CTX_INVALID_ID; + bool ctx_away_valid = GEN12_CTX_SW_ID(ctx_away) != GEN12_CTX_INVALID_ID; + + if (!ctx_away_valid && ctx_to_valid) + legacy_status |= GEN8_CTX_STATUS_IDLE_ACTIVE; + + /* + * ctx_away_valid && !ctx_to_valid can indicate either a ctx completion + * or a preempt-to-idle of a running ctx. The 2 cases can be + * differentiated by the value of new_queue, but since the + * GEN8_CTX_STATUS_ACTIVE_IDLE is set in both situations we don't + * differentiate between them here. + */ + if (ctx_away_valid && !ctx_to_valid) + legacy_status |= GEN8_CTX_STATUS_ACTIVE_IDLE; + + if ((ctx_away == ctx_to) && ctx_to_valid) + legacy_status |= GEN8_CTX_STATUS_LITE_RESTORE; + + if ((ctx_away != ctx_to) && ctx_away_valid && ctx_to_valid && !new_queue) + legacy_status |= GEN8_CTX_STATUS_ELEMENT_SWITCH; + + if ((switch_detail == GEN12_CTX_PREEMPTED) || (new_queue && ctx_away_valid)) + legacy_status |= GEN8_CTX_STATUS_PREEMPTED; + + if ((switch_detail == GEN12_CTX_COMPLETE) && ctx_away_valid) + legacy_status |= GEN8_CTX_STATUS_COMPLETE; + + /* add other events as required (e.g. semaphore wait) */ + GEM_BUG_ON(!legacy_status); + + return legacy_status; +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1316,7 +1399,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - unsigned int status; + u32 status; if (++head == num_entries) head = 0; @@ -1343,7 +1426,13 @@ static void process_csb(struct intel_engine_cs *engine) engine->name, head, buf[2 * head + 0], buf[2 * head + 1]); - status = buf[2 * head]; + if (INTEL_GEN(engine->i915) >= 12) + status = gen12_convert_csb(engine, buf[2 * head], buf[2 * head + 1]); + else + status = buf[2 * head]; + + /* don't read from buf[] after this point */ + if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) { GEM_BUG_ON(*execlists->active); promote: -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx