To further enchance port processing, keep track of reserved ports. This way we can iterate only the used subset of port space. Note that we lift the responsibility of execlists_submit_request() to inspect hw availability and always do dequeuing. This is to ensure that only the irq handler will be responsible for keeping track of available ports. v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris) v3: rebase to new naming Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_guc_submission.c | 52 ++++++++------ drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 7 +- drivers/gpu/drm/i915/intel_lrc.c | 112 ++++++++++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 45 +++++++++--- 5 files changed, 141 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c6cd05a5347c..d6071396da32 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client) * @engine: engine associated with the commands * * The only error here arises if the doorbell hardware isn't functioning - * as expected, which really shouln't happen. + * as expected, which really shouldn't happen. */ static void i915_guc_submit(struct intel_engine_cs *engine) { @@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs *engine) struct intel_guc *guc = &dev_priv->guc; struct i915_guc_client *client = guc->execbuf_client; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; const unsigned int engine_id = engine->id; unsigned int n; - for (n = 0; n < ARRAY_SIZE(execlists->port); n++) { + for (n = 0; n < execlists_active_ports(execlists); n++) { + struct execlist_port *port; struct drm_i915_gem_request *rq; unsigned int count; - rq = port_unpack(&port[n], &count); + port = execlists_port(execlists, n); + rq = port_unpack(port, &count); + if (rq && count == 0) { - port_set(&port[n], port_pack(rq, ++count)); + port_set(port, port_pack(rq, ++count)); if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); @@ -559,26 +561,29 @@ static void port_assign(struct execlist_port *port, static void i915_guc_dequeue(struct intel_engine_cs *engine) { + struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists->port; + struct execlist_port *port; struct drm_i915_gem_request *last = NULL; - const struct execlist_port * const last_port = execlists_port_tail(execlists); bool submit = false; struct rb_node *rb; - if (port_isset(port)) - port++; - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + + if (unlikely(!rb)) + goto done; + + port = execlists_request_port(execlists); + + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { - if (port == last_port) { + if (!execlists_inactive_ports(execlists)) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -587,7 +592,8 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); - port = execlists_port_next(execlists, port); + port = execlists_request_port(execlists); + GEM_BUG_ON(port_isset(port)); } INIT_LIST_HEAD(&rq->priotree.link); @@ -604,7 +610,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); done: execlists->first = rb; if (submit) { @@ -618,21 +624,21 @@ static void i915_guc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists_port_head(execlists); - const struct execlist_port * const last_port = execlists_port_tail(execlists); - struct drm_i915_gem_request *rq; - rq = port_request(port); - while (rq && i915_gem_request_completed(rq)) { + while (execlists_active_ports(execlists)) { + struct execlist_port *port = execlists_port_head(execlists); + struct drm_i915_gem_request *rq = port_request(port); + + if (!i915_gem_request_completed(rq)) + break; + trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port = execlists_port_complete(execlists, port); - - rq = port_request(port); + execlists_release_port(execlists, port); } - if (!port_isset(last_port)) + if (execlists_inactive_ports(execlists)) i915_guc_dequeue(engine); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b9d1f379c5a0..424e960fa115 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1382,7 +1382,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(execlists_port_head(execlists))) { + if (READ_ONCE(execlists->port_count)) { __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 3f857786e2ed..65e9c3a66e70 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -409,6 +409,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); + execlists->port_head = 0; + execlists->port_count = 0; + execlists->queue = RB_ROOT; execlists->first = NULL; } @@ -1504,8 +1507,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(execlists_port_head(&engine->execlists))) + /* All ports drained, no more ELSP submission? */ + if (execlists_active_ports(&engine->execlists)) return false; /* ELSP is empty, but there are ready requests? */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cb7fb3c651ce..b056bf5ddd53 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -392,6 +392,12 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) return ce->lrc_desc; } +static inline void elsp_write(u64 desc, u32 __iomem *elsp) +{ + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); +} + static void execlists_submit_ports(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -399,28 +405,29 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); unsigned int n; - for (n = execlists_num_ports(execlists); n--; ) { - struct execlist_port *port; + for (n = 0; n < execlists_inactive_ports(execlists); n++) + elsp_write(0, elsp); + + for (n = execlists_active_ports(execlists); n--; ) { struct drm_i915_gem_request *rq; + struct execlist_port *port; unsigned int count; u64 desc; port = execlists_port(execlists, n); rq = port_unpack(port, &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - port_set(port, port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } - writel(upper_32_bits(desc), elsp); - writel(lower_32_bits(desc), elsp); + GEM_BUG_ON(!rq); + GEM_BUG_ON(count > !n); + + if (!count++) + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + + port_set(port, port_pack(rq, count)); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); + + elsp_write(desc, elsp); } } @@ -455,15 +462,23 @@ static void port_assign(struct execlist_port *port, static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists_port_head(execlists); - const struct execlist_port * const last_port = execlists_port_tail(execlists); + struct execlist_port *port; + struct drm_i915_gem_request *last; struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) + spin_lock_irq(&engine->timeline->lock); + rb = execlists->first; + GEM_BUG_ON(rb_first(&execlists->queue) != rb); + + if (unlikely(!rb)) + goto done; + + if (execlists_active_ports(execlists)) { + port = execlists_port_tail(execlists); + last = port_request(port); + /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL * as we resubmit the request. See gen8_emit_breadcrumb() @@ -471,6 +486,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * request. */ last->tail = last->wa_tail; + } else { + /* Allocate first port to coalesce into */ + port = execlists_request_port(execlists); + last = NULL; + } /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is @@ -493,10 +513,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); - rb = execlists->first; - GEM_BUG_ON(rb_first(&execlists->queue) != rb); - while (rb) { + do { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; @@ -514,11 +531,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (last && !can_merge_ctx(rq->ctx, last->ctx)) { /* - * If we are on the second port and cannot + * If we are on the last port and cannot * combine this request with the last, then we * are done. */ - if (port == last_port) { + if (!execlists_inactive_ports(execlists)) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -543,8 +560,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); - port = execlists_port_next(execlists, port); - + port = execlists_request_port(execlists); GEM_BUG_ON(port_isset(port)); } @@ -562,7 +578,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); - } + } while (rb); + done: execlists->first = rb; if (submit) @@ -576,18 +593,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine) static void execlists_cancel_port_requests(struct intel_engine_execlists *execlists) { - struct execlist_port *port = execlists->port; - unsigned int num_ports = ARRAY_SIZE(execlists->port); + while (execlists_active_ports(execlists)) { + struct execlist_port *port; + struct drm_i915_gem_request *rq; - while (num_ports-- && port_isset(port)) { - struct drm_i915_gem_request *rq = port_request(port); + port = execlists_port_head(execlists); + GEM_BUG_ON(!port_isset(port)); + + rq = port_request(port); + GEM_BUG_ON(!rq); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); i915_gem_request_put(rq); - memset(port, 0, sizeof(*port)); - port++; + execlists_release_port(execlists, port); } + + GEM_BUG_ON(execlists->port_count); + + execlists->port_count = 0; + execlists->port_head = 0; } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -649,10 +674,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) static bool execlists_elsp_ready(struct intel_engine_execlists * const execlists) { - struct execlist_port * const port0 = execlists_port_head(execlists); - struct execlist_port * const port1 = execlists_port_next(execlists, port0); + const unsigned int active = execlists_active_ports(execlists); + + if (!active) + return true; - return port_count(port0) + port_count(port1) < 2; + return port_count(execlists_port_tail(execlists)) + active < 2; } /* @@ -663,7 +690,6 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port *port = execlists_port_head(execlists); struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -720,6 +746,7 @@ static void intel_lrc_irq_handler(unsigned long data) } while (head != tail) { + struct execlist_port *port; struct drm_i915_gem_request *rq; unsigned int status; unsigned int count; @@ -748,6 +775,7 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + port = execlists_port_head(execlists); /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); @@ -761,13 +789,13 @@ static void intel_lrc_irq_handler(unsigned long data) trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port = execlists_port_complete(execlists, port); + execlists_release_port(execlists, port); } else { port_set(port, port_pack(rq, count)); } /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && + GEM_BUG_ON(execlists_active_ports(execlists) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } @@ -792,7 +820,7 @@ static void insert_request(struct intel_engine_cs *engine, struct i915_priolist *p = lookup_priolist(engine, pt, prio); list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(execlists)) + if (ptr_unmask_bits(p, 1)) tasklet_hi_schedule(&execlists->irq_tasklet); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2e795b44a942..c09d1c93fd15 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -249,6 +249,11 @@ struct intel_engine_execlists { unsigned int port_head; /** + * @port_count: reserved ports + */ + unsigned int port_count; + + /** * @queue: queue of requests, in priority lists */ struct rb_root queue; @@ -529,6 +534,18 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) return execlists->port_mask + 1; } +static inline unsigned int +execlists_active_ports(const struct intel_engine_execlists * const execlists) +{ + return execlists->port_count; +} + +static inline unsigned int +execlists_inactive_ports(const struct intel_engine_execlists * const execlists) +{ + return execlists_num_ports(execlists) - execlists_active_ports(execlists); +} + #define __port_n(start, n, mask) (((start) + (n)) & (mask)) #define port_n(e, n) __port_n((e)->port_head, n, (e)->port_mask) @@ -543,33 +560,43 @@ execlists_port(struct intel_engine_execlists * const execlists, static inline struct execlist_port * execlists_port_head(struct intel_engine_execlists * const execlists) { + GEM_BUG_ON(!execlists->port_count); + return execlists_port(execlists, 0); } static inline struct execlist_port * execlists_port_tail(struct intel_engine_execlists * const execlists) { - return execlists_port(execlists, -1); + GEM_BUG_ON(!execlists->port_count); + + return execlists_port(execlists, execlists->port_count - 1); } static inline struct execlist_port * -execlists_port_next(struct intel_engine_execlists * const execlists, - const struct execlist_port * const port) +execlists_request_port(struct intel_engine_execlists * const execlists) { - const unsigned int n = port_index(port, execlists); + GEM_BUG_ON(execlists->port_count == execlists->port_mask + 1); + + execlists->port_count++; - return execlists_port(execlists, n + 1); + GEM_BUG_ON(port_isset(execlists_port_tail(execlists))); + + return execlists_port_tail(execlists); } -static inline struct execlist_port * -execlists_port_complete(struct intel_engine_execlists * const execlists, - struct execlist_port * const port) +static inline void +execlists_release_port(struct intel_engine_execlists * const execlists, + struct execlist_port * const port) { + GEM_BUG_ON(port_index(port, execlists) != execlists->port_head); + GEM_BUG_ON(!port_isset(port)); + GEM_BUG_ON(!execlists->port_count); memset(port, 0, sizeof(struct execlist_port)); execlists->port_head = port_n(execlists, 1); - return execlists_port_head(execlists); + execlists->port_count--; } static inline unsigned int -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx