Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> writes: > To further enchance port processing, keep track of > reserved ports. This way we can iterate only the used subset > of port space. Note that we lift the responsibility of > execlists_submit_request() to inspect hw availability and s/execlist_submit_request/insert_request. -Mika > always do dequeuing. This is to ensure that only the irq > handler will be responsible for keeping track of available ports. > > v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris) > > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 51 +++++++++-------- > drivers/gpu/drm/i915/i915_irq.c | 2 +- > drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++- > drivers/gpu/drm/i915/intel_lrc.c | 90 ++++++++++++++++++------------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 55 +++++++++++++----- > 5 files changed, 129 insertions(+), 76 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 25c9bac94c39..359f57a59cba 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client *client) > * @engine: engine associated with the commands > * > * The only error here arises if the doorbell hardware isn't functioning > - * as expected, which really shouln't happen. > + * as expected, which really shouldn't happen. > */ > static void i915_guc_submit(struct intel_engine_cs *engine) > { > @@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs *engine) > struct intel_guc *guc = &dev_priv->guc; > struct i915_guc_client *client = guc->execbuf_client; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = el->port; > const unsigned int engine_id = engine->id; > unsigned int n; > > - for (n = 0; n < ARRAY_SIZE(el->port); n++) { > + for (n = 0; n < execlist_active_ports(el); n++) { > + struct execlist_port *port; > struct drm_i915_gem_request *rq; > unsigned int count; > > - rq = port_unpack(&port[n], &count); > + port = execlist_port_index(el, n); > + > + rq = port_unpack(port, &count); > if (rq && count == 0) { > - port_set(&port[n], port_pack(rq, ++count)); > + port_set(port, port_pack(rq, ++count)); > > if (i915_vma_is_map_and_fenceable(rq->ring->vma)) > POSTING_READ_FW(GUC_STATUS); > @@ -560,25 +562,27 @@ static void port_assign(struct execlist_port *port, > static void i915_guc_dequeue(struct intel_engine_cs *engine) > { > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = el->port; > + struct execlist_port *port; > struct drm_i915_gem_request *last = NULL; > - const struct execlist_port * const last_port = execlist_port_tail(el); > bool submit = false; > struct rb_node *rb; > > - if (port_isset(port)) > - port++; > - > spin_lock_irq(&engine->timeline->lock); > rb = el->first; > GEM_BUG_ON(rb_first(&el->queue) != rb); > - while (rb) { > + > + if (unlikely(!rb)) > + goto done; > + > + port = execlist_request_port(el); > + > + do { > struct i915_priolist *p = rb_entry(rb, typeof(*p), node); > struct drm_i915_gem_request *rq, *rn; > > list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { > if (last && rq->ctx != last->ctx) { > - if (port == last_port) { > + if (!execlist_inactive_ports(el)) { > __list_del_many(&p->requests, > &rq->priotree.link); > goto done; > @@ -587,7 +591,8 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) > if (submit) > port_assign(port, last); > > - port = execlist_port_next(el, port); > + port = execlist_request_port(el); > + GEM_BUG_ON(port_isset(port)); > } > > INIT_LIST_HEAD(&rq->priotree.link); > @@ -604,7 +609,7 @@ static void i915_guc_dequeue(struct intel_engine_cs *engine) > INIT_LIST_HEAD(&p->requests); > if (p->priority != I915_PRIORITY_NORMAL) > kmem_cache_free(engine->i915->priorities, p); > - } > + } while (rb); > done: > el->first = rb; > if (submit) { > @@ -618,21 +623,21 @@ static void i915_guc_irq_handler(unsigned long data) > { > struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > - const struct execlist_port * const last_port = execlist_port_tail(el); > - struct drm_i915_gem_request *rq; > > - rq = port_request(port); > - while (rq && i915_gem_request_completed(rq)) { > + while (execlist_active_ports(el)) { > + struct execlist_port *port = execlist_port_head(el); > + struct drm_i915_gem_request *rq = port_request(port); > + > + if (!i915_gem_request_completed(rq)) > + break; > + > trace_i915_gem_request_out(rq); > i915_gem_request_put(rq); > > - port = execlist_port_complete(el, port); > - > - rq = port_request(port); > + execlist_release_port(el, port); > } > > - if (!port_isset(last_port)) > + if (execlist_inactive_ports(el)) > i915_guc_dequeue(engine); > } > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index ac5a95439393..a9d888b726c4 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1342,7 +1342,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) > bool tasklet = false; > > if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { > - if (port_count(execlist_port_head(el))) { > + if (READ_ONCE(el->port_count)) { > __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > tasklet = true; > } > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index b0d702063a50..29b170fdd6ef 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -407,6 +407,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) > BUILD_BUG_ON_NOT_POWER_OF_2(execlist_num_ports(&engine->execlist)); > GEM_BUG_ON(execlist_num_ports(&engine->execlist) > EXECLIST_MAX_PORTS); > > + el->port_head = 0; > + el->port_count = 0; > + > el->queue = RB_ROOT; > el->first = NULL; > } > @@ -1501,8 +1504,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) > if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) > return false; > > - /* Both ports drained, no more ELSP submission? */ > - if (port_request(execlist_port_head(&engine->execlist))) > + /* All ports drained, no more ELSP submission? */ > + if (execlist_active_ports(&engine->execlist)) > return false; > > /* ELSP is empty, but there are ready requests? */ > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 8550cd6635c9..bea10620bed2 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -399,26 +399,29 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) > engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); > unsigned int n; > > - for (n = execlist_num_ports(el); n--; ) { > - struct execlist_port *port; > + for (n = 0; n < execlist_inactive_ports(el); n++) { > + writel(0, elsp); > + writel(0, elsp); > + } > + > + for (n = execlist_active_ports(el); n--; ) { > struct drm_i915_gem_request *rq; > + struct execlist_port *port; > unsigned int count; > u64 desc; > > port = execlist_port_index(el, n); > - > rq = port_unpack(port, &count); > - if (rq) { > - GEM_BUG_ON(count > !n); > - if (!count++) > - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > - port_set(port, port_pack(rq, count)); > - desc = execlists_update_context(rq); > - GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); > - } else { > - GEM_BUG_ON(!n); > - desc = 0; > - } > + > + GEM_BUG_ON(!rq); > + GEM_BUG_ON(count > !n); > + > + if (!count++) > + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > + > + port_set(port, port_pack(rq, count)); > + desc = execlists_update_context(rq); > + GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc)); > > writel(upper_32_bits(desc), elsp); > writel(lower_32_bits(desc), elsp); > @@ -456,15 +459,23 @@ static void port_assign(struct execlist_port *port, > > static void execlists_dequeue(struct intel_engine_cs *engine) > { > - struct drm_i915_gem_request *last; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > - const struct execlist_port * const last_port = execlist_port_tail(el); > + struct execlist_port *port; > + struct drm_i915_gem_request *last; > struct rb_node *rb; > bool submit = false; > > - last = port_request(port); > - if (last) > + spin_lock_irq(&engine->timeline->lock); > + rb = el->first; > + GEM_BUG_ON(rb_first(&el->queue) != rb); > + > + if (unlikely(!rb)) > + goto done; > + > + if (execlist_active_ports(el)) { > + port = execlist_port_tail(el); > + last = port_request(port); > + > /* WaIdleLiteRestore:bdw,skl > * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL > * as we resubmit the request. See gen8_emit_breadcrumb() > @@ -472,6 +483,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * request. > */ > last->tail = last->wa_tail; > + } else { > + /* Allocate first port to coalesce into */ > + port = execlist_request_port(el); > + last = NULL; > + } > > /* Hardware submission is through 2 ports. Conceptually each port > * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > @@ -493,11 +509,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * sequence of requests as being the most optimal (fewest wake ups > * and context switches) submission. > */ > - > - spin_lock_irq(&engine->timeline->lock); > - rb = el->first; > - GEM_BUG_ON(rb_first(&el->queue) != rb); > - while (rb) { > + do { > struct i915_priolist *p = rb_entry(rb, typeof(*p), node); > struct drm_i915_gem_request *rq, *rn; > > @@ -515,11 +527,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > */ > if (last && !can_merge_ctx(rq->ctx, last->ctx)) { > /* > - * If we are on the second port and cannot > + * If we are on the last port and cannot > * combine this request with the last, then we > * are done. > */ > - if (port == last_port) { > + if (!execlist_inactive_ports(el)) { > __list_del_many(&p->requests, > &rq->priotree.link); > goto done; > @@ -544,8 +556,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > if (submit) > port_assign(port, last); > > - port = execlist_port_next(el, port); > - > + port = execlist_request_port(el); > GEM_BUG_ON(port_isset(port)); > } > > @@ -563,7 +574,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > INIT_LIST_HEAD(&p->requests); > if (p->priority != I915_PRIORITY_NORMAL) > kmem_cache_free(engine->i915->priorities, p); > - } > + } while (rb); > + > done: > el->first = rb; > if (submit) > @@ -582,6 +594,9 @@ static void execlist_cancel_port_requests(struct intel_engine_execlist *el) > i915_gem_request_put(port_request(&el->port[i])); > > memset(el->port, 0, sizeof(el->port)); > + > + el->port_count = 0; > + el->port_head = 0; > } > > static void execlists_cancel_requests(struct intel_engine_cs *engine) > @@ -643,10 +658,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > > static bool execlists_elsp_ready(struct intel_engine_execlist * const el) > { > - struct execlist_port * const port0 = execlist_port_head(el); > - struct execlist_port * const port1 = execlist_port_next(el, port0); > + const unsigned int active = execlist_active_ports(el); > + > + if (!active) > + return true; > > - return port_count(port0) + port_count(port1) < 2; > + return port_count(execlist_port_tail(el)) + active < 2; > } > > /* > @@ -657,7 +674,6 @@ static void intel_lrc_irq_handler(unsigned long data) > { > struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > struct intel_engine_execlist * const el = &engine->execlist; > - struct execlist_port *port = execlist_port_head(el); > struct drm_i915_private *dev_priv = engine->i915; > > /* We can skip acquiring intel_runtime_pm_get() here as it was taken > @@ -714,6 +730,7 @@ static void intel_lrc_irq_handler(unsigned long data) > } > > while (head != tail) { > + struct execlist_port *port; > struct drm_i915_gem_request *rq; > unsigned int status; > unsigned int count; > @@ -742,6 +759,7 @@ static void intel_lrc_irq_handler(unsigned long data) > if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) > continue; > > + port = execlist_port_head(el); > /* Check the context/desc id for this event matches */ > GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id); > > @@ -755,13 +773,13 @@ static void intel_lrc_irq_handler(unsigned long data) > trace_i915_gem_request_out(rq); > i915_gem_request_put(rq); > > - port = execlist_port_complete(el, port); > + execlist_release_port(el, port); > } else { > port_set(port, port_pack(rq, count)); > } > > /* After the final element, the hw should be idle */ > - GEM_BUG_ON(port_count(port) == 0 && > + GEM_BUG_ON(execlist_active_ports(el) == 0 && > !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > } > > @@ -786,7 +804,7 @@ static void insert_request(struct intel_engine_cs *engine, > struct i915_priolist *p = lookup_priolist(engine, pt, prio); > > list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests); > - if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(el)) > + if (ptr_unmask_bits(p, 1)) > tasklet_hi_schedule(&el->irq_tasklet); > } > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 991f6c0bd6c2..efa5a8ea1ecb 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -249,6 +249,11 @@ struct intel_engine_execlist { > unsigned int port_head; > > /** > + * @port_count: reserved ports > + */ > + unsigned int port_count; > + > + /** > * @queue: queue of requests, in priority lists > */ > struct rb_root queue; > @@ -529,14 +534,20 @@ execlist_num_ports(const struct intel_engine_execlist * const el) > return el->port_mask + 1; > } > > -#define __port_idx(start, index, mask) (((start) + (index)) & (mask)) > +static inline unsigned int > +execlist_active_ports(const struct intel_engine_execlist * const el) > +{ > + return el->port_count; > +} > > -static inline struct execlist_port * > -execlist_port_head(struct intel_engine_execlist * const el) > +static inline unsigned int > +execlist_inactive_ports(const struct intel_engine_execlist * const el) > { > - return &el->port[el->port_head]; > + return execlist_num_ports(el) - execlist_active_ports(el); > } > > +#define __port_idx(start, index, mask) (((start) + (index)) & (mask)) > + > /* Index starting from port_head */ > static inline struct execlist_port * > execlist_port_index(struct intel_engine_execlist * const el, > @@ -546,30 +557,46 @@ execlist_port_index(struct intel_engine_execlist * const el, > } > > static inline struct execlist_port * > -execlist_port_tail(struct intel_engine_execlist * const el) > +execlist_port_head(struct intel_engine_execlist * const el) > { > - return &el->port[__port_idx(el->port_head, -1, el->port_mask)]; > + GEM_BUG_ON(!el->port_count); > + > + return execlist_port_index(el, 0); > } > > static inline struct execlist_port * > -execlist_port_next(struct intel_engine_execlist * const el, > - const struct execlist_port * const port) > +execlist_port_tail(struct intel_engine_execlist * const el) > { > - const unsigned int i = port_index(port, el); > + GEM_BUG_ON(!el->port_count); > > - return &el->port[__port_idx(i, 1, el->port_mask)]; > + return execlist_port_index(el, el->port_count - 1); > } > > static inline struct execlist_port * > -execlist_port_complete(struct intel_engine_execlist * const el, > - struct execlist_port * const port) > +execlist_request_port(struct intel_engine_execlist * const el) > { > + GEM_BUG_ON(el->port_count == el->port_mask + 1); > + > + el->port_count++; > + > + GEM_BUG_ON(port_isset(execlist_port_tail(el))); > + > + return execlist_port_tail(el); > +} > + > +static inline void > +execlist_release_port(struct intel_engine_execlist * const el, > + struct execlist_port * const port) > +{ > + > GEM_BUG_ON(port_index(port, el) != el->port_head); > + GEM_BUG_ON(!port_isset(port)); > + GEM_BUG_ON(!el->port_count); > > memset(port, 0, sizeof(struct execlist_port)); > - el->port_head = __port_idx(el->port_head, 1, el->port_mask); > > - return execlist_port_head(el); > + el->port_head = __port_idx(el->port_head, 1, el->port_mask); > + el->port_count--; > } > > static inline unsigned int > -- > 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx