From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> LRC code was calling GEM API like i915_gem_obj_ggtt_offset from places where the struct_mutex cannot be grabbed (irq handlers). To avoid that this patch caches some interesting bits and values in the engine and context structures. Some usages are also removed where they are not needed like a few asserts which are either impossible or have been checked already during engine initialization. Side benefit is also that interrupt handlers and command submission stop evaluating invariant conditionals, like what Gen we are running on, on every interrupt and every command submitted. This patch deals with logical ring context id and descriptors while subsequent patches will deal with the remaining issues. v2: * Cache the VMA instead of the address. (Chris Wilson) * Incorporate Dave Gordon's good comments and function name. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Cc: Dave Gordon <david.s.gordon@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 15 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/intel_lrc.c | 126 +++++++++++++++++++------------- drivers/gpu/drm/i915/intel_lrc.h | 4 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 6 files changed, 90 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e3377abc0d4d..0b3550f05026 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1994,12 +1994,13 @@ static int i915_context_status(struct seq_file *m, void *unused) } static void i915_dump_lrc_obj(struct seq_file *m, - struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj) + struct intel_context *ctx, + struct intel_engine_cs *ring) { struct page *page; uint32_t *reg_state; int j; + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; unsigned long ggtt_offset = 0; if (ctx_obj == NULL) { @@ -2009,7 +2010,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, } seq_printf(m, "CONTEXT: %s %u\n", ring->name, - intel_execlists_ctx_id(ctx_obj)); + intel_execlists_ctx_id(ctx, ring)); if (!i915_gem_obj_ggtt_bound(ctx_obj)) seq_puts(m, "\tNot bound in GGTT\n"); @@ -2058,8 +2059,7 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) list_for_each_entry(ctx, &dev_priv->context_list, link) { for_each_ring(ring, dev_priv, i) { if (ring->default_context != ctx) - i915_dump_lrc_obj(m, ring, - ctx->engine[i].state); + i915_dump_lrc_obj(m, ctx, ring); } } @@ -2133,11 +2133,8 @@ static int i915_execlists(struct seq_file *m, void *data) seq_printf(m, "\t%d requests in queue\n", count); if (head_req) { - struct drm_i915_gem_object *ctx_obj; - - ctx_obj = head_req->ctx->engine[ring_id].state; seq_printf(m, "\tHead request id: %u\n", - intel_execlists_ctx_id(ctx_obj)); + intel_execlists_ctx_id(head_req->ctx, ring)); seq_printf(m, "\tHead request tail: %u\n", head_req->tail); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 747d2d84a18c..79bb3671a15e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -883,6 +883,8 @@ struct intel_context { struct drm_i915_gem_object *state; struct intel_ringbuffer *ringbuf; int pin_count; + struct i915_vma *lrc_vma; + u64 lrc_desc; } engine[I915_NUM_RINGS]; struct list_head link; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b448ad832dcf..e5737963ab79 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -44,7 +44,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) - /* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ab344e0b878c..504030ce7f93 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -265,7 +265,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists /** * intel_execlists_ctx_id() - get the Execlists Context ID - * @ctx_obj: Logical Ring Context backing object. + * @ctx: Context to get the ID for + * @ring: Engine to get the ID for * * Do not confuse with ctx->id! Unfortunately we have a name overload * here: the old context ID we pass to userspace as a handler so that @@ -273,16 +274,15 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists * ELSP so that the GPU can inform us of the context status via * interrupts. * + * The context ID is a portion of the context descriptor, so we can + * just extract the required part from the cached descriptor. + * * Return: 20-bits globally unique context ID. */ -u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) +u32 intel_execlists_ctx_id(struct intel_context *ctx, + struct intel_engine_cs *ring) { - u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) + - LRC_PPHWSP_PN * PAGE_SIZE; - - /* LRCA is required to be 4K aligned so the more significant 20 bits - * are globally unique */ - return lrca >> 12; + return ctx->engine[ring->id].lrc_desc >> GEN8_CTX_ID_SHIFT; } static bool disable_lite_restore_wa(struct intel_engine_cs *ring) @@ -297,31 +297,7 @@ static bool disable_lite_restore_wa(struct intel_engine_cs *ring) uint64_t intel_lr_context_descriptor(struct intel_context *ctx, struct intel_engine_cs *ring) { - struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; - uint64_t desc; - uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) + - LRC_PPHWSP_PN * PAGE_SIZE; - - WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); - - desc = GEN8_CTX_VALID; - desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (IS_GEN8(ctx_obj->base.dev)) - desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= GEN8_CTX_PRIVILEGE; - desc |= lrca; - desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers */ - /* desc |= GEN8_CTX_FORCE_RESTORE; */ - - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ - if (disable_lite_restore_wa(ring)) - desc |= GEN8_CTX_FORCE_RESTORE; - - return desc; + return ctx->engine[ring->id].lrc_desc; } static void execlists_elsp_write(struct drm_i915_gem_request *rq0, @@ -369,8 +345,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) uint32_t *reg_state; BUG_ON(!ctx_obj); - WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); - WARN_ON(!i915_gem_obj_is_pinned(rb_obj)); page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN); reg_state = kmap_atomic(page); @@ -477,9 +451,7 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, execlist_link); if (head_req != NULL) { - struct drm_i915_gem_object *ctx_obj = - head_req->ctx->engine[ring->id].state; - if (intel_execlists_ctx_id(ctx_obj) == request_id) { + if (intel_execlists_ctx_id(head_req->ctx, ring) == request_id) { WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); @@ -556,7 +528,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) } } - if (disable_lite_restore_wa(ring)) { + if (ring->disable_lite_restore_wa) { /* Prevent a ctx to preempt itself */ if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) && (submit_contexts != 0)) @@ -1038,15 +1010,51 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) return 0; } +/** + * intel_lr_context_descriptor_update() - calculate & cache the + * descriptor for a pinned + * context + * @ctx: Context to work on + * @ring: Engine the descriptor will be used with + * + * The context descriptor encodes various attributes of a context, + * including its GTT address and some flags. Because it's fairly + * expensive to calculate, we'll just do it once and cache the result, + * which remains valid until the context is unpinned. + * + * This is what a descriptor looks like, from LSB to MSB: + * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-51: ctx ID, a globally unique tag (the LRCA again!) + * bits 52-63: reserved, may encode the engine ID (for GuC) + */ +static void +intel_lr_context_descriptor_update(struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + uint64_t lrca, desc; + + lrca = ctx->engine[ring->id].lrc_vma->node.start + + LRC_PPHWSP_PN * PAGE_SIZE; + + desc = ring->ctx_desc_template; /* bits 0-11 */ + desc |= lrca; /* bits 12-31 */ + desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */ + + ctx->engine[ring->id].lrc_desc = desc; +} + static int intel_lr_context_do_pin(struct intel_engine_cs *ring, - struct drm_i915_gem_object *ctx_obj, - struct intel_ringbuffer *ringbuf) + struct intel_context *ctx) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + int ret; WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (ret) @@ -1056,6 +1064,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring, if (ret) goto unpin_ctx_obj; + ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj); + intel_lr_context_descriptor_update(ctx, ring); ctx_obj->dirty = true; /* Invalidate GuC TLB. */ @@ -1074,11 +1084,9 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq) { int ret = 0; struct intel_engine_cs *ring = rq->ring; - struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state; - struct intel_ringbuffer *ringbuf = rq->ringbuf; if (rq->ctx->engine[ring->id].pin_count++ == 0) { - ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf); + ret = intel_lr_context_do_pin(ring, rq->ctx); if (ret) goto reset_pin_count; } @@ -1100,6 +1108,8 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq) if (--rq->ctx->engine[ring->id].pin_count == 0) { intel_unpin_ringbuffer_obj(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); + rq->ctx->engine[ring->id].lrc_vma = NULL; + rq->ctx->engine[ring->id].lrc_desc = 0; } } } @@ -1938,6 +1948,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) ring->status_page.obj = NULL; } + ring->disable_lite_restore_wa = false; + ring->ctx_desc_template = 0; + lrc_destroy_wa_ctx_obj(ring); ring->dev = NULL; } @@ -1960,6 +1973,24 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->execlist_retired_req_list); spin_lock_init(&ring->execlist_lock); + ring->disable_lite_restore_wa = disable_lite_restore_wa(ring); + + ring->ctx_desc_template = GEN8_CTX_VALID; + ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) << + GEN8_CTX_ADDRESSING_MODE_SHIFT; + if (IS_GEN8(dev)) + ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; + ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers */ + /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ + + /* WaEnableForceRestoreInCtxtDescForVCS:skl */ + /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ + if (ring->disable_lite_restore_wa) + ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + ret = i915_cmd_parser_init_ring(ring); if (ret) goto error; @@ -1969,10 +2000,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin goto error; /* As this is the default context, always pin it */ - ret = intel_lr_context_do_pin( - ring, - ring->default_context->engine[ring->id].state, - ring->default_context->engine[ring->id].ringbuf); + ret = intel_lr_context_do_pin(ring, ring->default_context); if (ret) { DRM_ERROR( "Failed to pin and map ringbuffer %s: %d\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index de41ad6cd63d..49af638f6213 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -107,13 +107,15 @@ void intel_lr_context_reset(struct drm_device *dev, uint64_t intel_lr_context_descriptor(struct intel_context *ctx, struct intel_engine_cs *ring); +u32 intel_execlists_ctx_id(struct intel_context *ctx, + struct intel_engine_cs *ring); + /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); struct i915_execbuffer_params; int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas); -u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); void intel_lrc_irq_handler(struct intel_engine_cs *ring); void intel_execlists_retire_requests(struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7349d9258191..85ce2272f92c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -269,6 +269,8 @@ struct intel_engine_cs { struct list_head execlist_queue; struct list_head execlist_retired_req_list; u8 next_context_status_buffer; + bool disable_lite_restore_wa; + u32 ctx_desc_template; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct drm_i915_gem_request *request); int (*emit_flush)(struct drm_i915_gem_request *request, -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx