Quoting Mika Kuoppala (2017-09-12 09:36:18) > To further enchance port processing, keep track of > reserved ports. This way we can iterate only the used subset > of port space. Note that we lift the responsibility of > execlists_submit_request() to inspect hw availability and > always do dequeuing. This is to ensure that only the irq > handler will be responsible for keeping track of available ports. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 43 ++++++++------- > drivers/gpu/drm/i915/i915_irq.c | 2 +- > drivers/gpu/drm/i915/intel_engine_cs.c | 5 +- > drivers/gpu/drm/i915/intel_lrc.c | 84 ++++++++++++++++++------------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 50 +++++++++++++----- > 5 files changed, 117 insertions(+), 67 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 0dfb03a0cee4..fdda3a1835ad 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -662,12 +662,19 @@ static void port_assign(struct execlist_port *port, > static bool i915_guc_dequeue(struct intel_engine_cs *engine) > { > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > - const struct execlist_port * const last_port = execlist_port_tail(el); > - struct drm_i915_gem_request *last = port_request(port); > + struct execlist_port *port; > + struct drm_i915_gem_request *last; > struct rb_node *rb; > bool submit = false; > > + if (execlist_active_ports(el)) { > + port = execlist_port_tail(el); > + last = port_request(port); > + } else { > + port = NULL; > + last = NULL; > + } > + > spin_lock_irq(&engine->timeline->lock); > rb = el->first; > GEM_BUG_ON(rb_first(&el->queue) != rb); > @@ -675,9 +682,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) > struct i915_priolist *p = rb_entry(rb, typeof(*p), node); > struct drm_i915_gem_request *rq, *rn; > > + if (!port) > + port = execlist_request_port(el); > + > list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { > if (last && rq->ctx != last->ctx) { > - if (port == last_port) { > + if (!execlist_inactive_ports(el)) { > __list_del_many(&p->requests, > &rq->priotree.link); > goto done; > @@ -686,7 +696,8 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) > if (submit) > port_assign(port, last); > > - port = execlist_port_next(el, port); > + port = execlist_request_port(el); > + GEM_BUG_ON(port_isset(port)); > } > > INIT_LIST_HEAD(&rq->priotree.link); > @@ -717,26 +728,22 @@ static void i915_guc_irq_handler(unsigned long data) > { > struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > - const struct execlist_port * const last_port = execlist_port_tail(el); > - struct drm_i915_gem_request *rq; > - bool submit; > > do { > - rq = port_request(port); > - while (rq && i915_gem_request_completed(rq)) { > + while (execlist_active_ports(el)) { > + struct execlist_port *port = execlist_port_head(el); > + struct drm_i915_gem_request *rq = port_request(port); > + > + if (!i915_gem_request_completed(rq)) > + break; > + > trace_i915_gem_request_out(rq); > i915_gem_request_put(rq); > > - port = execlist_port_complete(el, port); > - > - rq = port_request(port); > + execlist_release_port(el, port); > } > > - submit = false; > - if (!port_count(last_port)) > - submit = i915_guc_dequeue(engine); > - } while (submit); > + } while (execlist_inactive_ports(el) && i915_guc_dequeue(engine)); > } > > /* > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 1919ac0b7b0f..de4e608786a8 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1312,7 +1312,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) > bool tasklet = false; > > if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { > - if (port_count(execlist_port_head(el))) { > + if (execlist_active_ports(el)) { > __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > tasklet = true; > } > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 4b9eaec50070..b09212bb66b4 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -389,6 +389,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) > GEM_BUG_ON(el->port_mask >= EXECLIST_MAX_PORTS); > > el->port_head = 0; > + el->port_count = 0; > > el->queue = RB_ROOT; > el->first = NULL; > @@ -1345,8 +1346,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) > if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) > return false; > > - /* Both ports drained, no more ELSP submission? */ > - if (port_request(execlist_port_head(&engine->execlist))) > + /* All ports drained, no more ELSP submission? */ > + if (execlist_active_ports(&engine->execlist)) > return false; > > /* ELSP is empty, but there are ready requests? */ > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 1a1c68c53fbd..18a87cf56b81 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -344,23 +344,27 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > struct execlist_port *port; > unsigned int n; > > + for (n = 0; n < execlist_inactive_ports(el); n++) { > + writel(0, elsp); > + writel(0, elsp); > + } > + > for_each_execlist_port_reverse(el, port, n) { > struct drm_i915_gem_request *rq; > unsigned int count; > u64 desc; > > rq = port_unpack(port, &count); > - if (rq) { > - GEM_BUG_ON(count > !n); > - if (!count++) > - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > - port_set(port, port_pack(rq, count)); > - desc = execlists_update_context(rq); > - GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); > - } else { > - GEM_BUG_ON(!n); > - desc = 0; > - } > + > + GEM_BUG_ON(!rq); > + GEM_BUG_ON(count > !n); > + > + if (!count++) > + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > + > + port_set(port, port_pack(rq, count)); > + desc = execlists_update_context(rq); > + GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); > > writel(upper_32_bits(desc), elsp); > writel(lower_32_bits(desc), elsp); > @@ -398,15 +402,16 @@ static void port_assign(struct execlist_port *port, > > static void execlists_dequeue(struct intel_engine_cs *engine) > { > - struct drm_i915_gem_request *last; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > - const struct execlist_port * const last_port = execlist_port_tail(el); > + struct execlist_port *port; > + struct drm_i915_gem_request *last; > struct rb_node *rb; > bool submit = false; > > - last = port_request(port); > - if (last) > + if (execlist_active_ports(el)) { > + port = execlist_port_tail(el); > + last = port_request(port); > + > /* WaIdleLiteRestore:bdw,skl > * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL > * as we resubmit the request. See gen8_emit_breadcrumb() > @@ -414,6 +419,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * request. > */ > last->tail = last->wa_tail; > + } else { > + port = NULL; > + last = NULL; > + } > > /* Hardware submission is through 2 ports. Conceptually each port > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > @@ -443,6 +452,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > struct i915_priolist *p = rb_entry(rb, typeof(*p), node); > struct drm_i915_gem_request *rq, *rn; > > + if (!port) > + port = execlist_request_port(el); > + > list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { > /* > * Can we combine this request with the current port? > @@ -461,7 +473,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * combine this request with the last, then we > * are done. > */ > - if (port == last_port) { > + if (!execlist_inactive_ports(el)) { > __list_del_many(&p->requests, > &rq->priotree.link); > goto done; > @@ -486,8 +498,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > if (submit) > port_assign(port, last); > > - port = execlist_port_next(el, port); > - > + port = execlist_request_port(el); > GEM_BUG_ON(port_isset(port)); > } > > @@ -518,9 +529,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > > static bool execlists_elsp_ready(struct intel_engine_execlist * const el) > { > - struct execlist_port * const port = execlist_port_head(el); > + const unsigned int active = execlist_active_ports(el); > + > + if (!active) > + return true; > > - return port_count(port) + port_count(execlist_port_next(el, port)) < 2; > + return port_count(execlist_port_tail(el)) + active < 2; > } > > /* > @@ -531,7 +545,6 @@ static void intel_lrc_irq_handler(unsigned long data) > { > struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > struct drm_i915_private *dev_priv = engine->i915; > > /* We can skip acquiring intel_runtime_pm_get() here as it was taken > @@ -571,6 +584,7 @@ static void intel_lrc_irq_handler(unsigned long data) > tail = GEN8_CSB_WRITE_PTR(head); > head = GEN8_CSB_READ_PTR(head); > while (head != tail) { > + struct execlist_port *port; > struct drm_i915_gem_request *rq; > unsigned int status; > unsigned int count; > @@ -599,6 +613,7 @@ static void intel_lrc_irq_handler(unsigned long data) > if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > continue; > > + port = execlist_port_head(el); > /* Check the context/desc id for this event matches */ > GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != > port->context_id); > @@ -613,13 +628,13 @@ static void intel_lrc_irq_handler(unsigned long data) > trace_i915_gem_request_out(rq); > i915_gem_request_put(rq); > > - port = execlist_port_complete(el, port); > + execlist_release_port(el, port); > } else { > port_set(port, port_pack(rq, count)); > } > > /* After the final element, the hw should be idle */ > - GEM_BUG_ON(port_count(port) == 0 && > + GEM_BUG_ON(execlist_active_ports(el) == 0 && > !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > } > > @@ -709,10 +724,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) > > if (insert_request(engine, > &request->priotree, > - request->priotree.priority)) { > - if (execlists_elsp_ready(el)) > - tasklet_hi_schedule(&el->irq_tasklet); > - } > + request->priotree.priority)) > + tasklet_hi_schedule(&el->irq_tasklet); > > GEM_BUG_ON(!el->first); > GEM_BUG_ON(list_empty(&request->priotree.link)); > @@ -1334,7 +1347,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, > struct drm_i915_gem_request *request) > { > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > struct intel_context *ce; > > /* > @@ -1351,12 +1363,16 @@ static void reset_common_ring(struct intel_engine_cs *engine, > return; > } > > - if (request->ctx != port_request(port)->ctx) { > - i915_gem_request_put(port_request(port)); > - execlist_port_complete(el, port); > - } > + if (execlist_active_ports(el)) { > + struct execlist_port *port = execlist_port_head(el); > > - GEM_BUG_ON(request->ctx != port_request(port)->ctx); > + if (request->ctx != port_request(port)->ctx) { > + i915_gem_request_put(port_request(port)); > + execlist_release_port(el, port); > + } > + > + GEM_BUG_ON(request->ctx != port_request(port)->ctx); > + } > > /* If the request was innocent, we leave the request in the ELSP > * and will try to replay it on restarting. The context image may > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 13d8e1115f3b..9c377f6eb8fe 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -249,6 +249,11 @@ struct intel_engine_execlist { > unsigned int port_head; > > /** > + * @port_count: reserved ports > + */ > + unsigned int port_count; > + > + /** > * @queue: queue of requests, in priority lists > */ > struct rb_root queue; > @@ -510,44 +515,65 @@ struct intel_engine_cs { > > #define for_each_execlist_port(el__, port__, n__) \ > for ((n__) = 0; \ > - (port__) = &(el__)->port[__port_idx((el__)->port_head, (n__), (el__)->port_mask)], (n__) < (el__)->port_mask + 1; \ > + (port__) = &(el__)->port[__port_idx((el__)->port_head, (n__), (el__)->port_mask)], (n__) < (el__)->port_count; \ > (n__)++) > > #define for_each_execlist_port_reverse(el__, port__, n__) \ > - for ((n__) = (el__)->port_mask + 1; \ > + for ((n__) = (el__)->port_count; \ > (port__) = &(el__)->port[__port_idx((el__)->port_head - 1, (n__), (el__)->port_mask)], (n__)--;) > > static inline struct execlist_port * > execlist_port_head(struct intel_engine_execlist * const el) > { > + GEM_DEBUG_BUG_ON(!el->port_count); > + > return &el->port[el->port_head]; > } > > static inline struct execlist_port * > execlist_port_tail(struct intel_engine_execlist * const el) > { > - return &el->port[__port_idx(el->port_head, el->port_mask, el->port_mask)]; > + GEM_DEBUG_BUG_ON(!el->port_count); > + > + return &el->port[__port_idx(el->port_head, el->port_count - 1, el->port_mask)]; > } > > -static inline struct execlist_port * > -execlist_port_next(struct intel_engine_execlist * const el, > - const struct execlist_port * const port) > +static inline unsigned int > +execlist_active_ports(const struct intel_engine_execlist * const el) > { > - const unsigned int i = port_index(port, el); > + return READ_ONCE(el->port_count); > +} > > - return &el->port[__port_idx(i, 1, el->port_mask)]; > +static inline unsigned int > +execlist_inactive_ports(const struct intel_engine_execlist * const el) > +{ > + return el->port_mask + 1 - READ_ONCE(el->port_count); > } > > static inline struct execlist_port * > -execlist_port_complete(struct intel_engine_execlist * const el, > - struct execlist_port * const port) > +execlist_request_port(struct intel_engine_execlist * const el) > +{ > + GEM_DEBUG_BUG_ON(el->port_count == el->port_mask + 1); The idea of the DEBUG ones are that they match GEM_DEBUG_EXEC, i.e. code that only conditionally exists under debugging. GEM_BUG_ON() only exists for debugging (CI) builds. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx