add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187) function old new delta execlists_submit_ports 262 471 +209 port_assign.isra - 136 +136 capture 6344 6359 +15 reset_common_ring 438 452 +14 execlists_submit_request 228 238 +10 gen8_init_common_ring 334 341 +7 intel_engine_is_idle 106 105 -1 i915_engine_info 2314 2290 -24 __i915_gem_set_wedged_BKL 485 411 -74 intel_lrc_irq_handler 1789 1604 -185 execlists_update_context 294 - -294 The most important change there is the improve to the intel_lrc_irq_handler and excclist_submit_ports (net improvement since execlists_update_context is now inlined). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 32 ++++--- drivers/gpu/drm/i915/i915_gem.c | 6 +- drivers/gpu/drm/i915/i915_gpu_error.c | 13 ++- drivers/gpu/drm/i915/i915_guc_submission.c | 18 ++-- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +- 7 files changed, 120 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 870c470177b5..0b5d7142d8d9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3315,6 +3315,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) if (i915.enable_execlists) { u32 ptr, read, write; struct rb_node *rb; + unsigned int idx; seq_printf(m, "\tExeclist status: 0x%08x %08x\n", I915_READ(RING_EXECLIST_STATUS_LO(engine)), @@ -3332,8 +3333,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) if (read > write) write += GEN8_CSB_ENTRIES; while (read < write) { - unsigned int idx = ++read % GEN8_CSB_ENTRIES; - + idx = ++read % GEN8_CSB_ENTRIES; seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", idx, I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), @@ -3341,21 +3341,19 @@ static int i915_engine_info(struct seq_file *m, void *unused) } rcu_read_lock(); - rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) { - seq_printf(m, "\t\tELSP[0] count=%d, ", - engine->execlist_port[0].count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[0] idle\n"); - } - rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) { - seq_printf(m, "\t\tELSP[1] count=%d, ", - engine->execlist_port[1].count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[1] idle\n"); + for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) { + unsigned int count; + + rq = port_unpack(&engine->execlist_port[idx], + &count); + if (rq) { + seq_printf(m, "\t\tELSP[%d] count=%d, ", + idx, count); + print_request(m, rq, "rq: "); + } else { + seq_printf(m, "\t\tELSP[%d] idle\n", + idx); + } } rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2bc72314cdd1..f6df402a5247 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3039,12 +3039,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine) */ if (i915.enable_execlists) { + struct execlist_port *port = engine->execlist_port; unsigned long flags; + unsigned int n; spin_lock_irqsave(&engine->timeline->lock, flags); - i915_gem_request_put(engine->execlist_port[0].request); - i915_gem_request_put(engine->execlist_port[1].request); + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) + i915_gem_request_put(port_request(&port[n])); memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); engine->execlist_queue = RB_ROOT; engine->execlist_first = NULL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4b247b050dcd..c5cdc6611d7f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine, static void error_record_engine_execlists(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { + const struct execlist_port *port = engine->execlist_port; unsigned int n; - for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) - if (engine->execlist_port[n].request) - record_request(engine->execlist_port[n].request, - &ee->execlist[n]); + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { + struct drm_i915_gem_request *rq = port_request(&port[n]); + + if (!rq) + break; + + record_request(rq, &ee->execlist[n]); + } } static void record_context(struct drm_i915_error_context *e, diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1642fff9cf13..370373c97b81 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -658,7 +658,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq) static bool i915_guc_dequeue(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlist_port; - struct drm_i915_gem_request *last = port[0].request; + struct drm_i915_gem_request *last = port[0].request_count; struct rb_node *rb; bool submit = false; @@ -672,7 +672,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) if (port != engine->execlist_port) break; - i915_gem_request_assign(&port->request, last); + i915_gem_request_assign(&port->request_count, last); nested_enable_signaling(last); port++; } @@ -688,7 +688,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) submit = true; } if (submit) { - i915_gem_request_assign(&port->request, last); + i915_gem_request_assign(&port->request_count, last); nested_enable_signaling(last); engine->execlist_first = rb; } @@ -705,17 +705,19 @@ static void i915_guc_irq_handler(unsigned long data) bool submit; do { - rq = port[0].request; + rq = port[0].request_count; while (rq && i915_gem_request_completed(rq)) { trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port[0].request = port[1].request; - port[1].request = NULL; - rq = port[0].request; + + port[0].request_count = port[1].request_count; + port[1].request_count = NULL; + + rq = port[0].request_count; } submit = false; - if (!port[1].request) + if (!port[1].request_count) submit = i915_guc_dequeue(engine); } while (submit); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 402769d9d840..10027d0a09b5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1148,7 +1148,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return false; /* Both ports drained, no more ELSP submission? */ - if (engine->execlist_port[0].request) + if (port_request(&engine->execlist_port[0])) return false; /* Ring stopped? */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7df278fe492e..69299fbab4f9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -342,39 +342,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; u32 __iomem *elsp = - dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); - u64 desc[2]; - - GEM_BUG_ON(port[0].count > 1); - if (!port[0].count) - execlists_context_status_change(port[0].request, - INTEL_CONTEXT_SCHEDULE_IN); - desc[0] = execlists_update_context(port[0].request); - GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); - port[0].count++; - - if (port[1].request) { - GEM_BUG_ON(port[1].count); - execlists_context_status_change(port[1].request, - INTEL_CONTEXT_SCHEDULE_IN); - desc[1] = execlists_update_context(port[1].request); - GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); - port[1].count = 1; - } else { - desc[1] = 0; - } - GEM_BUG_ON(desc[0] == desc[1]); - - /* You must always write both descriptors in the order below. */ - writel(upper_32_bits(desc[1]), elsp); - writel(lower_32_bits(desc[1]), elsp); + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; + + for (n = ARRAY_SIZE(engine->execlist_port); n--; ) { + struct drm_i915_gem_request *rq; + unsigned int count; + u64 desc; + + rq = port_unpack(&port[n], &count); + if (rq) { + GEM_BUG_ON(count > !n); + if (!count++) + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + port[n].request_count = port_pack(rq, count); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + } else { + GEM_BUG_ON(!n); + desc = 0; + } - writel(upper_32_bits(desc[0]), elsp); - /* The context is automatically loaded after the following */ - writel(lower_32_bits(desc[0]), elsp); + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); + } } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -395,6 +388,18 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, return true; } +static void port_assign(struct execlist_port *port, + struct drm_i915_gem_request *rq) +{ + GEM_BUG_ON(rq == port_request(port)); + + if (port->request_count) + i915_gem_request_put(port_request(port)); + + port->request_count = + port_pack(i915_gem_request_get(rq), port_count(port)); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *last; @@ -402,7 +407,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - last = port->request; + last = port_request(port); if (last) /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL @@ -412,7 +417,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ last->tail = last->wa_tail; - GEM_BUG_ON(port[1].request); + GEM_BUG_ON(port[1].request_count); /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is @@ -469,7 +474,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(last->ctx == cursor->ctx); - i915_gem_request_assign(&port->request, last); + if (submit) + port_assign(port, last); port++; } @@ -484,7 +490,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) submit = true; } if (submit) { - i915_gem_request_assign(&port->request, last); + port_assign(port, last); engine->execlist_first = rb; } spin_unlock_irq(&engine->timeline->lock); @@ -495,14 +501,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) static bool execlists_elsp_idle(struct intel_engine_cs *engine) { - return !engine->execlist_port[0].request; + return !port_count(&engine->execlist_port[0]); } static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { const struct execlist_port *port = engine->execlist_port; - return port[0].count + port[1].count < 2; + return port_count(&port[0]) + port_count(&port[1]) < 2; } /* @@ -552,7 +558,9 @@ static void intel_lrc_irq_handler(unsigned long data) tail = GEN8_CSB_WRITE_PTR(head); head = GEN8_CSB_READ_PTR(head); while (head != tail) { + struct drm_i915_gem_request *rq; unsigned int status; + unsigned int count; if (++head == GEN8_CSB_ENTRIES) head = 0; @@ -582,20 +590,24 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != port[0].context_id); - GEM_BUG_ON(port[0].count == 0); - if (--port[0].count == 0) { + rq = port_unpack(&port[0], &count); + GEM_BUG_ON(count == 0); + if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); - execlists_context_status_change(port[0].request, - INTEL_CONTEXT_SCHEDULE_OUT); + GEM_BUG_ON(!i915_gem_request_completed(rq)); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); - trace_i915_gem_request_out(port[0].request); - i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); + } else { + port[0].request_count = port_pack(rq, count); } - GEM_BUG_ON(port[0].count == 0 && + /* After the final element, the hw should be idle */ + GEM_BUG_ON(port_count(&port[0]) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } @@ -1148,11 +1160,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u32 port_seqno(struct execlist_port *port) -{ - return port->request ? port->request->global_seqno : 0; -} - static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1177,12 +1184,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { - DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", - engine->name, - port_seqno(&engine->execlist_port[0]), - port_seqno(&engine->execlist_port[1])); - engine->execlist_port[0].count = 0; - engine->execlist_port[1].count = 0; + struct execlist_port *port = engine->execlist_port; + unsigned int n; + + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { + if (!port[n].request_count) + break; + + DRM_DEBUG_DRIVER("Restarting %s from 0x%x [%d]\n", + engine->name, + port_request(&port[n])->global_seqno, + n); + + /* Discard the current inflight count */ + port[n].request_count = port_request(&port[n]); + } + execlists_submit_ports(engine); } @@ -1261,13 +1278,13 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Catch up with any missed context-switch interrupts */ - if (request->ctx != port[0].request->ctx) { - i915_gem_request_put(port[0].request); + if (request->ctx != port_request(&port[0])->ctx) { + i915_gem_request_put(port_request(&port[0])); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); } - GEM_BUG_ON(request->ctx != port[0].request->ctx); + GEM_BUG_ON(request->ctx != port_request(&port[0])->ctx); /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d25b88467e5e..39b733e5cfd3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -377,8 +377,12 @@ struct intel_engine_cs { /* Execlists */ struct tasklet_struct irq_tasklet; struct execlist_port { - struct drm_i915_gem_request *request; - unsigned int count; + struct drm_i915_gem_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx