To further enchance port processing, keep track of reserved ports. This way we can iterate only the used subset of port space. Note that we lift the responsibility of execlists_submit_request() to inspect hw availability and always do dequeuing. This is to ensure that only the irq handler will be responsible for keeping track of available ports. Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_guc_submission.c | 43 ++++++++------- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +- drivers/gpu/drm/i915/intel_lrc.c | 84 ++++++++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 50 +++++++++++++----- 5 files changed, 117 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 0dfb03a0cee4..fdda3a1835ad 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -662,12 +662,19 @@ static void port_assign(struct execlist_port *port, static bool i915_guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); - const struct execlist_port * const last_port = execlist_port_tail(el); - struct drm_i915_gem_request *last = port_request(port); + struct execlist_port *port; + struct drm_i915_gem_request *last; struct rb_node *rb; bool submit = false; + if (execlist_active_ports(el)) { + port = execlist_port_tail(el); + last = port_request(port); + } else { + port = NULL; + last = NULL; + } + spin_lock_irq(&engine->timeline->lock); rb = el->first; GEM_BUG_ON(rb_first(&el->queue) != rb); @@ -675,9 +682,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; + if (!port) + port = execlist_request_port(el); + list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { if (last && rq->ctx != last->ctx) { - if (port == last_port) { + if (!execlist_inactive_ports(el)) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -686,7 +696,8 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); - port = execlist_port_next(el, port); + port = execlist_request_port(el); + GEM_BUG_ON(port_isset(port)); } INIT_LIST_HEAD(&rq->priotree.link); @@ -717,26 +728,22 @@ static void i915_guc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); - const struct execlist_port * const last_port = execlist_port_tail(el); - struct drm_i915_gem_request *rq; - bool submit; do { - rq = port_request(port); - while (rq && i915_gem_request_completed(rq)) { + while (execlist_active_ports(el)) { + struct execlist_port *port = execlist_port_head(el); + struct drm_i915_gem_request *rq = port_request(port); + + if (!i915_gem_request_completed(rq)) + break; + trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port = execlist_port_complete(el, port); - - rq = port_request(port); + execlist_release_port(el, port); } - submit = false; - if (!port_count(last_port)) - submit = i915_guc_dequeue(engine); - } while (submit); + } while (execlist_inactive_ports(el) && i915_guc_dequeue(engine)); } /* diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1919ac0b7b0f..de4e608786a8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1312,7 +1312,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (port_count(execlist_port_head(el))) { + if (execlist_active_ports(el)) { __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4b9eaec50070..b09212bb66b4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -389,6 +389,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) GEM_BUG_ON(el->port_mask >= EXECLIST_MAX_PORTS); el->port_head = 0; + el->port_count = 0; el->queue = RB_ROOT; el->first = NULL; @@ -1345,8 +1346,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; - /* Both ports drained, no more ELSP submission? */ - if (port_request(execlist_port_head(&engine->execlist))) + /* All ports drained, no more ELSP submission? */ + if (execlist_active_ports(&engine->execlist)) return false; /* ELSP is empty, but there are ready requests? */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1a1c68c53fbd..18a87cf56b81 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -344,23 +344,27 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) struct execlist_port *port; unsigned int n; + for (n = 0; n < execlist_inactive_ports(el); n++) { + writel(0, elsp); + writel(0, elsp); + } + for_each_execlist_port_reverse(el, port, n) { struct drm_i915_gem_request *rq; unsigned int count; u64 desc; rq = port_unpack(port, &count); - if (rq) { - GEM_BUG_ON(count > !n); - if (!count++) - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - port_set(port, port_pack(rq, count)); - desc = execlists_update_context(rq); - GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); - } else { - GEM_BUG_ON(!n); - desc = 0; - } + + GEM_BUG_ON(!rq); + GEM_BUG_ON(count > !n); + + if (!count++) + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + + port_set(port, port_pack(rq, count)); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); writel(upper_32_bits(desc), elsp); writel(lower_32_bits(desc), elsp); @@ -398,15 +402,16 @@ static void port_assign(struct execlist_port *port, static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *last; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); - const struct execlist_port * const last_port = execlist_port_tail(el); + struct execlist_port *port; + struct drm_i915_gem_request *last; struct rb_node *rb; bool submit = false; - last = port_request(port); - if (last) + if (execlist_active_ports(el)) { + port = execlist_port_tail(el); + last = port_request(port); + /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL * as we resubmit the request. See gen8_emit_breadcrumb() @@ -414,6 +419,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * request. */ last->tail = last->wa_tail; + } else { + port = NULL; + last = NULL; + } /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is @@ -443,6 +452,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct drm_i915_gem_request *rq, *rn; + if (!port) + port = execlist_request_port(el); + list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { /* * Can we combine this request with the current port? @@ -461,7 +473,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * combine this request with the last, then we * are done. */ - if (port == last_port) { + if (!execlist_inactive_ports(el)) { __list_del_many(&p->requests, &rq->priotree.link); goto done; @@ -486,8 +498,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (submit) port_assign(port, last); - port = execlist_port_next(el, port); - + port = execlist_request_port(el); GEM_BUG_ON(port_isset(port)); } @@ -518,9 +529,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) static bool execlists_elsp_ready(struct intel_engine_execlist * const el) { - struct execlist_port * const port = execlist_port_head(el); + const unsigned int active = execlist_active_ports(el); + + if (!active) + return true; - return port_count(port) + port_count(execlist_port_next(el, port)) < 2; + return port_count(execlist_port_tail(el)) + active < 2; } /* @@ -531,7 +545,6 @@ static void intel_lrc_irq_handler(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); struct drm_i915_private *dev_priv = engine->i915; /* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -571,6 +584,7 @@ static void intel_lrc_irq_handler(unsigned long data) tail = GEN8_CSB_WRITE_PTR(head); head = GEN8_CSB_READ_PTR(head); while (head != tail) { + struct execlist_port *port; struct drm_i915_gem_request *rq; unsigned int status; unsigned int count; @@ -599,6 +613,7 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + port = execlist_port_head(el); /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != port->context_id); @@ -613,13 +628,13 @@ static void intel_lrc_irq_handler(unsigned long data) trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port = execlist_port_complete(el, port); + execlist_release_port(el, port); } else { port_set(port, port_pack(rq, count)); } /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && + GEM_BUG_ON(execlist_active_ports(el) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } @@ -709,10 +724,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) if (insert_request(engine, &request->priotree, - request->priotree.priority)) { - if (execlists_elsp_ready(el)) - tasklet_hi_schedule(&el->irq_tasklet); - } + request->priotree.priority)) + tasklet_hi_schedule(&el->irq_tasklet); GEM_BUG_ON(!el->first); GEM_BUG_ON(list_empty(&request->priotree.link)); @@ -1334,7 +1347,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { struct intel_engine_execlist * const el = &engine->execlist; - struct execlist_port *port = execlist_port_head(el); struct intel_context *ce; /* @@ -1351,12 +1363,16 @@ static void reset_common_ring(struct intel_engine_cs *engine, return; } - if (request->ctx != port_request(port)->ctx) { - i915_gem_request_put(port_request(port)); - execlist_port_complete(el, port); - } + if (execlist_active_ports(el)) { + struct execlist_port *port = execlist_port_head(el); - GEM_BUG_ON(request->ctx != port_request(port)->ctx); + if (request->ctx != port_request(port)->ctx) { + i915_gem_request_put(port_request(port)); + execlist_release_port(el, port); + } + + GEM_BUG_ON(request->ctx != port_request(port)->ctx); + } /* If the request was innocent, we leave the request in the ELSP * and will try to replay it on restarting. The context image may diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 13d8e1115f3b..9c377f6eb8fe 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -249,6 +249,11 @@ struct intel_engine_execlist { unsigned int port_head; /** + * @port_count: reserved ports + */ + unsigned int port_count; + + /** * @queue: queue of requests, in priority lists */ struct rb_root queue; @@ -510,44 +515,65 @@ struct intel_engine_cs { #define for_each_execlist_port(el__, port__, n__) \ for ((n__) = 0; \ - (port__) = &(el__)->port[__port_idx((el__)->port_head, (n__), (el__)->port_mask)], (n__) < (el__)->port_mask + 1; \ + (port__) = &(el__)->port[__port_idx((el__)->port_head, (n__), (el__)->port_mask)], (n__) < (el__)->port_count; \ (n__)++) #define for_each_execlist_port_reverse(el__, port__, n__) \ - for ((n__) = (el__)->port_mask + 1; \ + for ((n__) = (el__)->port_count; \ (port__) = &(el__)->port[__port_idx((el__)->port_head - 1, (n__), (el__)->port_mask)], (n__)--;) static inline struct execlist_port * execlist_port_head(struct intel_engine_execlist * const el) { + GEM_DEBUG_BUG_ON(!el->port_count); + return &el->port[el->port_head]; } static inline struct execlist_port * execlist_port_tail(struct intel_engine_execlist * const el) { - return &el->port[__port_idx(el->port_head, el->port_mask, el->port_mask)]; + GEM_DEBUG_BUG_ON(!el->port_count); + + return &el->port[__port_idx(el->port_head, el->port_count - 1, el->port_mask)]; } -static inline struct execlist_port * -execlist_port_next(struct intel_engine_execlist * const el, - const struct execlist_port * const port) +static inline unsigned int +execlist_active_ports(const struct intel_engine_execlist * const el) { - const unsigned int i = port_index(port, el); + return READ_ONCE(el->port_count); +} - return &el->port[__port_idx(i, 1, el->port_mask)]; +static inline unsigned int +execlist_inactive_ports(const struct intel_engine_execlist * const el) +{ + return el->port_mask + 1 - READ_ONCE(el->port_count); } static inline struct execlist_port * -execlist_port_complete(struct intel_engine_execlist * const el, - struct execlist_port * const port) +execlist_request_port(struct intel_engine_execlist * const el) +{ + GEM_DEBUG_BUG_ON(el->port_count == el->port_mask + 1); + + el->port_count++; + + GEM_DEBUG_BUG_ON(port_isset(execlist_port_tail(el))); + + return execlist_port_tail(el); +} + +static inline void +execlist_release_port(struct intel_engine_execlist * const el, + struct execlist_port * const port) { GEM_DEBUG_BUG_ON(port_index(port, el) != el->port_head); + GEM_DEBUG_BUG_ON(!port_isset(port)); + GEM_DEBUG_BUG_ON(!el->port_count); memset(port, 0, sizeof(struct execlist_port)); - el->port_head = __port_idx(el->port_head, 1, el->port_mask); - return execlist_port_head(el); + el->port_head = __port_idx(el->port_head, 1, el->port_mask); + el->port_count--; } void execlist_cancel_port_requests(struct intel_engine_execlist * const el); -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx