To further enchance port processing, keep track of reserved ports. This way we can iterate only the used subset of port space. Note that we lift the responsibility of execlists_submit_request() to inspect hw availability and always do dequeuing. This is to ensure that only the irq handler will be responsible for keeping track of available ports. v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris) Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_guc_submission.c | 51 +++++++++-------- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++- drivers/gpu/drm/i915/intel_lrc.c | 90 ++++++++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 55 +++++++++++++----- 5 files changed, 129 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 25c9bac94c39..359f57a59cba 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client) * @engine: engine associated with the commands * * The only error here arises if the doorbell hardware isn't functioning - * as expected, which really shouln't happen. + * as expected, which really shouldn't happen. */ static void i915_guc_submit(struct intel_engine_cs *engine) { @@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs *engine) struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client = guc->execbuf_client; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = el->port; const unsigned int engine_id = engine->id; unsigned int n; - for (n = 0; n < ARRAY_SIZE(el->port); n++) { + for (n = 0; n < execlist_active_ports(el); n++) { + struct execlist_port *port; struct drm_i915_gem_request *rq; unsigned int count; - rq = port_unpack(&port[n], &count); + port = execlist_port_index(el, n); + + rq = port_unpack(port, &count); if (rq && count == 0) { - port_set(&port[n], port_pack(rq, ++count)); + port_set(port, port_pack(rq, ++count)); if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); @@ -560,25 +562,27 @@ static void port_assign(struct execlist_port *port, static void i915_guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = el->port; + struct execlist_port *port; struct drm_i915_gem_request *last = NULL; - const struct execlist_port * const last_port = execlist_port_tail(el); bool submit = false; struct rb_node *rb; - if (port_isset(port)) - port++; - spin_lock_irq(&engine->timeline->lock); rb = el->first; GEM_BUG_ON(rb_first(&el->queue) != rb); - while (rb) { + + if (unlikely(!rb)) + goto done; + + port = execlist_request_port(el); + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { - if (port == last_port) { + if (!execlist_inactive_ports(el)) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -587,7 +591,8 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); - port = execlist_port_next(el, port); + port = execlist_request_port(el); + GEM_BUG_ON(port_isset(port)); } INIT_LIST_HEAD(&rq->priotree.link); @@ -604,7 +609,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: el->first = rb; if (submit) { @@ -618,21 +623,21 @@ static void i915_guc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); - const struct execlist_port * const last_port = execlist_port_tail(el); - struct drm_i915_gem_request *rq; - rq = port_request(port); - while (rq && i915_gem_request_completed(rq)) { + while (execlist_active_ports(el)) { + struct execlist_port *port = execlist_port_head(el); + struct drm_i915_gem_request *rq = port_request(port); + + if (!i915_gem_request_completed(rq)) + break; + trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port = execlist_port_complete(el, port); - - rq = port_request(port); + execlist_release_port(el, port); } - if (!port_isset(last_port)) + if (execlist_inactive_ports(el)) i915_guc_dequeue(engine); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ac5a95439393..a9d888b726c4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1342,7 +1342,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(execlist_port_head(el))) { + if (READ_ONCE(el->port_count)) { __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b0d702063a50..29b170fdd6ef 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -407,6 +407,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) BUILD_BUG_ON_NOT_POWER_OF_2(execlist_num_ports(&engine->execlist)); GEM_BUG_ON(execlist_num_ports(&engine->execlist) > EXECLIST_MAX_PORTS); + el->port_head = 0; + el->port_count = 0; + el->queue = RB_ROOT; el->first = NULL; } @@ -1501,8 +1504,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(execlist_port_head(&engine->execlist))) + /* All ports drained, no more ELSP submission? */ + if (execlist_active_ports(&engine->execlist)) return false; /* ELSP is empty, but there are ready requests? */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8550cd6635c9..bea10620bed2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -399,26 +399,29 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); unsigned int n; - for (n = execlist_num_ports(el); n--; ) { - struct execlist_port *port; + for (n = 0; n < execlist_inactive_ports(el); n++) { + writel(0, elsp); + writel(0, elsp); + } + + for (n = execlist_active_ports(el); n--; ) { struct drm_i915_gem_request *rq; + struct execlist_port *port; unsigned int count; u64 desc; port = execlist_port_index(el, n); - rq = port_unpack(port, &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - port_set(port, port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } + + GEM_BUG_ON(!rq); + GEM_BUG_ON(count > !n); + + if (!count++) + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + + port_set(port, port_pack(rq, count)); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); writel(upper_32_bits(desc), elsp); writel(lower_32_bits(desc), elsp); @@ -456,15 +459,23 @@ static void port_assign(struct execlist_port *port, static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); - const struct execlist_port * const last_port = execlist_port_tail(el); + struct execlist_port *port; + struct drm_i915_gem_request *last; struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) + spin_lock_irq(&engine->timeline->lock); + rb = el->first; + GEM_BUG_ON(rb_first(&el->queue) != rb); + + if (unlikely(!rb)) + goto done; + + if (execlist_active_ports(el)) { + port = execlist_port_tail(el); + last = port_request(port); + /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL * as we resubmit the request. See gen8_emit_breadcrumb() @@ -472,6 +483,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * request. */ last->tail = last->wa_tail; + } else { + /* Allocate first port to coalesce into */ + port = execlist_request_port(el); + last = NULL; + } /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is @@ -493,11 +509,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * sequence of requests as being the most optimal (fewest wake ups * and context switches) submission. */ - - spin_lock_irq(&engine->timeline->lock); - rb = el->first; - GEM_BUG_ON(rb_first(&el->queue) != rb); - while (rb) { + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -515,11 +527,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (last && !can_merge_ctx(rq->ctx, last->ctx)) { /* - * If we are on the second port and cannot + * If we are on the last port and cannot * combine this request with the last, then we * are done. */ - if (port == last_port) { + if (!execlist_inactive_ports(el)) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -544,8 +556,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); - port = execlist_port_next(el, port); - + port = execlist_request_port(el); GEM_BUG_ON(port_isset(port)); } @@ -563,7 +574,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); + done: el->first = rb; if (submit) @@ -582,6 +594,9 @@ static void execlist_cancel_port_requests(struct intel_engine_execlist *el) i915_gem_request_put(port_request(&el->port[i])); memset(el->port, 0, sizeof(el->port)); + + el->port_count = 0; + el->port_head = 0; } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -643,10 +658,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) static bool execlists_elsp_ready(struct intel_engine_execlist * const el) { - struct execlist_port * const port0 = execlist_port_head(el); - struct execlist_port * const port1 = execlist_port_next(el, port0); + const unsigned int active = execlist_active_ports(el); + + if (!active) + return true; - return port_count(port0) + port_count(port1) < 2; + return port_count(execlist_port_tail(el)) + active < 2; } /* @@ -657,7 +674,6 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -714,6 +730,7 @@ static void intel_lrc_irq_handler(unsigned long data) } while (head != tail) { + struct execlist_port *port; struct drm_i915_gem_request *rq; unsigned int status; unsigned int count; @@ -742,6 +759,7 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + port = execlist_port_head(el); /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -755,13 +773,13 @@ static void intel_lrc_irq_handler(unsigned long data) trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port = execlist_port_complete(el, port); + execlist_release_port(el, port); } else { port_set(port, port_pack(rq, count)); } /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && + GEM_BUG_ON(execlist_active_ports(el) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } @@ -786,7 +804,7 @@ static void insert_request(struct intel_engine_cs *engine, struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(el)) + if (ptr_unmask_bits(p, 1)) tasklet_hi_schedule(&el->irq_tasklet); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 991f6c0bd6c2..efa5a8ea1ecb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -249,6 +249,11 @@ struct intel_engine_execlist { unsigned int port_head; /** + * @port_count: reserved ports + */ + unsigned int port_count; + + /** * @queue: queue of requests, in priority lists */ struct rb_root queue; @@ -529,14 +534,20 @@ execlist_num_ports(const struct intel_engine_execlist * const el) return el->port_mask + 1; } -#define __port_idx(start, index, mask) (((start) + (index)) & (mask)) +static inline unsigned int +execlist_active_ports(const struct intel_engine_execlist * const el) +{ + return el->port_count; +} -static inline struct execlist_port * -execlist_port_head(struct intel_engine_execlist * const el) +static inline unsigned int +execlist_inactive_ports(const struct intel_engine_execlist * const el) { - return &el->port[el->port_head]; + return execlist_num_ports(el) - execlist_active_ports(el); } +#define __port_idx(start, index, mask) (((start) + (index)) & (mask)) + /* Index starting from port_head */ static inline struct execlist_port * execlist_port_index(struct intel_engine_execlist * const el, @@ -546,30 +557,46 @@ execlist_port_index(struct intel_engine_execlist * const el, } static inline struct execlist_port * -execlist_port_tail(struct intel_engine_execlist * const el) +execlist_port_head(struct intel_engine_execlist * const el) { - return &el->port[__port_idx(el->port_head, -1, el->port_mask)]; + GEM_BUG_ON(!el->port_count); + + return execlist_port_index(el, 0); } static inline struct execlist_port * -execlist_port_next(struct intel_engine_execlist * const el, - const struct execlist_port * const port) +execlist_port_tail(struct intel_engine_execlist * const el) { - const unsigned int i = port_index(port, el); + GEM_BUG_ON(!el->port_count); - return &el->port[__port_idx(i, 1, el->port_mask)]; + return execlist_port_index(el, el->port_count - 1); } static inline struct execlist_port * -execlist_port_complete(struct intel_engine_execlist * const el, - struct execlist_port * const port) +execlist_request_port(struct intel_engine_execlist * const el) { + GEM_BUG_ON(el->port_count == el->port_mask + 1); + + el->port_count++; + + GEM_BUG_ON(port_isset(execlist_port_tail(el))); + + return execlist_port_tail(el); +} + +static inline void +execlist_release_port(struct intel_engine_execlist * const el, + struct execlist_port * const port) +{ + GEM_BUG_ON(port_index(port, el) != el->port_head); + GEM_BUG_ON(!port_isset(port)); + GEM_BUG_ON(!el->port_count); memset(port, 0, sizeof(struct execlist_port)); - el->port_head = __port_idx(el->port_head, 1, el->port_mask); - return execlist_port_head(el); + el->port_head = __port_idx(el->port_head, 1, el->port_mask); + el->port_count--; } static inline unsigned int -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx