Quoting Chris Wilson (2019-08-01 16:48:33) > Quoting Tvrtko Ursulin (2019-08-01 16:29:53) > > For instance Icelake engine dependent stuff sneaked into > > intel_lrc.c/lrc_desriptors at some point, which is also against the > > spirit of caching. If we were to move the cached value in ce then we > > would be able to remove that and have it once again minimal in there. > > Well we can set all bits but hw_id/lrca at init time. How about if I run > that past you? static u64 -lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) +base_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_gem_context *ctx = ce->gem_context; u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -426,18 +425,12 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; - /* bits 12-31 */ /* * The following 32bits are copied into the OA reports (dword 2). * Consider updating oa_get_render_ctx_id in i915_perf.c when changing * anything below. */ if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ @@ -445,8 +438,29 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; /* bits 61-63 */ + } + + return desc; +} + +static u64 +update_lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx = ce->gem_context; + u64 desc = ce->lrc_desc; + + desc &= ~GENMASK_ULL(31, 12); + desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE; + + if (INTEL_GEN(engine->i915) >= 11) { + GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + + desc &= ~GENMASK_ULL(47, 37); + desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; } else { GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); + + desc &= ~GENMASK_ULL(52, 32); desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ } @@ -1631,7 +1645,7 @@ __execlists_context_pin(struct intel_context *ce, if (ret) goto unpin_map; - ce->lrc_desc = lrc_descriptor(ce, engine); + ce->lrc_desc = update_lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); @@ -3126,6 +3140,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, ce->ring = ring; ce->state = vma; + ce->lrc_desc = base_lrc_descriptor(ce, engine); + return 0; error_ring_free: That's pretty much the same amount of work in context_pin. I'm not convinced that caching between pins achieves very much. Concur? -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx