Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Emulate HW to track and manage ELSP queue. A set of SW ports are defined > and requests are assigned to these ports before submitting them to HW. This > helps in cleaning up incomplete requests during reset recovery easier > especially after engine reset by decoupling elsp queue management. This > will become more clear in the next patch. > > In the engine reset case we want to resume where we left-off after skipping > the incomplete batch which requires checking the elsp queue, removing > element and fixing elsp_submitted counts in some cases. Instead of directly > manipulating the elsp queue from reset path we can examine these ports, fix > up ringbuffer pointers using the incomplete request and restart submissions > again after reset. > > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > Cc: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-3-git-send-email-arun.siluvery@xxxxxxxxxxxxxxx > --- > drivers/gpu/drm/i915/i915_debugfs.c | 2 +- > drivers/gpu/drm/i915/i915_gem.c | 6 +- > drivers/gpu/drm/i915/i915_gem_request.c | 11 +- > drivers/gpu/drm/i915/i915_gem_request.h | 24 +- > drivers/gpu/drm/i915/intel_lrc.c | 417 +++++++++++--------------------- > drivers/gpu/drm/i915/intel_lrc.h | 2 - > drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +- > 7 files changed, 160 insertions(+), 309 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 9bd41581b592..f1f937a64c27 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2200,7 +2200,7 @@ static int i915_execlists(struct seq_file *m, void *data) > status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); > seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer); > > - read_pointer = engine->next_context_status_buffer; > + read_pointer = GEN8_CSB_READ_PTR(status_pointer); > write_pointer = GEN8_CSB_WRITE_PTR(status_pointer); > if (read_pointer > write_pointer) > write_pointer += GEN8_CSB_ENTRIES; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index f4f8eaa90f2a..19715f5e698f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2443,7 +2443,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) > /* Ensure irq handler finishes or is cancelled. */ > tasklet_kill(&engine->irq_tasklet); > > - intel_execlists_cancel_requests(engine); > + INIT_LIST_HEAD(&engine->execlist_queue); > + i915_gem_request_assign(&engine->execlist_port[0].request, > + NULL); > + i915_gem_request_assign(&engine->execlist_port[1].request, > + NULL); How about engine->reset() which would contain these? > } > > /* > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 6c2553715263..49cca4233a8e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -736,16 +736,18 @@ complete: > return ret; > } > > -static void engine_retire_requests(struct intel_engine_cs *engine) > +static bool engine_retire_requests(struct intel_engine_cs *engine) > { > struct drm_i915_gem_request *request, *next; > > list_for_each_entry_safe(request, next, &engine->request_list, link) { > if (!i915_gem_request_completed(request)) > - break; > + return false; > > i915_gem_request_retire(request); > } > + > + return true; > } > > void i915_gem_retire_requests(struct drm_i915_private *dev_priv) > @@ -759,9 +761,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) > > GEM_BUG_ON(!dev_priv->gt.awake); > > - for_each_engine(engine, dev_priv) { > - engine_retire_requests(engine); > - if (!intel_engine_is_active(engine)) > + for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines) { > + if (engine_retire_requests(engine)) > dev_priv->gt.active_engines &= ~intel_engine_flag(engine); > } > These were already stripped to another patch. Good. > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 3496e28785e7..32123e38ef4b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -94,6 +94,9 @@ struct drm_i915_gem_request { > /** Position in the ringbuffer of the end of the whole request */ > u32 tail; > > + /** Position in the ringbuffer after the end of the whole request */ > + u32 wa_tail; > + > /** Preallocate space in the ringbuffer for the emitting the request */ > u32 reserved_space; > > @@ -130,27 +133,8 @@ struct drm_i915_gem_request { > /** process identifier submitting this request */ > struct pid *pid; > > - /** > - * The ELSP only accepts two elements at a time, so we queue > - * context/tail pairs on a given queue (ring->execlist_queue) until the > - * hardware is available. The queue serves a double purpose: we also use > - * it to keep track of the up to 2 contexts currently in the hardware > - * (usually one in execution and the other queued up by the GPU): We > - * only remove elements from the head of the queue when the hardware > - * informs us that an element has been completed. > - * > - * All accesses to the queue are mediated by a spinlock > - * (ring->execlist_lock). > - */ > - > - /** Execlist link in the submission queue.*/ > + /** Link in the execlist submission queue, guarded by execlist_lock. */ > struct list_head execlist_link; > - > - /** Execlists no. of times this request has been sent to the ELSP */ > - int elsp_submitted; > - > - /** Execlists context hardware id. */ > - unsigned int ctx_hw_id; > }; > > extern const struct fence_ops i915_fence_ops; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 309c5d9b1c57..56b5aa8a35c4 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -156,6 +156,11 @@ > #define GEN8_CTX_STATUS_COMPLETE (1 << 4) > #define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) > > +#define GEN8_CTX_STATUS_COMPLETED_MASK \ > + (GEN8_CTX_STATUS_ACTIVE_IDLE | \ > + GEN8_CTX_STATUS_PREEMPTED | \ > + GEN8_CTX_STATUS_ELEMENT_SWITCH) > + > #define CTX_LRI_HEADER_0 0x01 > #define CTX_CONTEXT_CONTROL 0x02 > #define CTX_RING_HEAD 0x04 > @@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) > { > struct drm_i915_private *dev_priv = engine->i915; > > - if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) > - engine->idle_lite_restore_wa = ~0; > - > - engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || > - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && > - (engine->id == VCS || engine->id == VCS2); > + engine->disable_lite_restore_wa = > + (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) || > + IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) && > + (engine->id == VCS || engine->id == VCS2); Candidate for another cleanup patch after as both revids smells obsolete. > > engine->ctx_desc_template = GEN8_CTX_VALID; > if (IS_GEN8(dev_priv)) > @@ -328,36 +331,6 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, > return ctx->engine[engine->id].lrc_desc; > } > > -static void execlists_elsp_write(struct drm_i915_gem_request *rq0, > - struct drm_i915_gem_request *rq1) > -{ > - > - struct intel_engine_cs *engine = rq0->engine; > - struct drm_i915_private *dev_priv = rq0->i915; > - uint64_t desc[2]; > - > - if (rq1) { > - desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine); > - rq1->elsp_submitted++; > - } else { > - desc[1] = 0; > - } > - > - desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine); > - rq0->elsp_submitted++; > - > - /* You must always write both descriptors in the order below. */ > - I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1])); > - I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1])); > - > - I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0])); > - /* The context is automatically loaded after the following */ > - I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0])); > - > - /* ELSP is a wo register, use another nearby reg for posting */ > - POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine)); > -} > - > static void > execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) > { > @@ -367,13 +340,12 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) > ASSIGN_CTX_PDP(ppgtt, reg_state, 0); > } > > -static void execlists_update_context(struct drm_i915_gem_request *rq) > +static u64 execlists_update_context(struct drm_i915_gem_request *rq) > { > - struct intel_engine_cs *engine = rq->engine; > + struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; > struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; > - uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state; > > - reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); > + ce->lrc_reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); > > /* True 32b PPGTT with dynamic page allocation: update PDP > * registers and point the unallocated PDPs to scratch page. > @@ -381,32 +353,14 @@ static void execlists_update_context(struct drm_i915_gem_request *rq) > * in 48-bit mode. > */ > if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > - execlists_update_context_pdps(ppgtt, reg_state); > -} > - > -static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0, > - struct drm_i915_gem_request *rq1) > -{ > - struct drm_i915_private *dev_priv = rq0->i915; > - unsigned int fw_domains = rq0->engine->fw_domains; > - > - execlists_update_context(rq0); > - > - if (rq1) > - execlists_update_context(rq1); > + execlists_update_context_pdps(ppgtt, ce->lrc_reg_state); > > - spin_lock_irq(&dev_priv->uncore.lock); > - intel_uncore_forcewake_get__locked(dev_priv, fw_domains); > - > - execlists_elsp_write(rq0, rq1); > - > - intel_uncore_forcewake_put__locked(dev_priv, fw_domains); > - spin_unlock_irq(&dev_priv->uncore.lock); > + return ce->lrc_desc; > } > > -static inline void execlists_context_status_change( > - struct drm_i915_gem_request *rq, > - unsigned long status) > +static inline void > +execlists_context_status_change(struct drm_i915_gem_request *rq, > + unsigned long status) > { > /* > * Only used when GVT-g is enabled now. When GVT-g is disabled, > @@ -418,122 +372,104 @@ static inline void execlists_context_status_change( > atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); > } > > -static void execlists_unqueue(struct intel_engine_cs *engine) > +static void execlists_submit_ports(struct intel_engine_cs *engine) > { > - struct drm_i915_gem_request *req0 = NULL, *req1 = NULL; > - struct drm_i915_gem_request *cursor, *tmp; > - > - assert_spin_locked(&engine->execlist_lock); > - > - /* > - * If irqs are not active generate a warning as batches that finish > - * without the irqs may get lost and a GPU Hang may occur. > - */ > - WARN_ON(!intel_irqs_enabled(engine->i915)); > - > - /* Try to read in pairs */ > - list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue, > - execlist_link) { > - if (!req0) { > - req0 = cursor; > - } else if (req0->ctx == cursor->ctx) { > - /* Same ctx: ignore first request, as second request > - * will update tail past first request's workload */ > - cursor->elsp_submitted = req0->elsp_submitted; > - list_del(&req0->execlist_link); > - i915_gem_request_put(req0); > - req0 = cursor; > - } else { > - if (IS_ENABLED(CONFIG_DRM_I915_GVT)) { > - /* > - * req0 (after merged) ctx requires single > - * submission, stop picking > - */ > - if (req0->ctx->execlists_force_single_submission) > - break; > - /* > - * req0 ctx doesn't require single submission, > - * but next req ctx requires, stop picking > - */ > - if (cursor->ctx->execlists_force_single_submission) > - break; > - } > - req1 = cursor; > - WARN_ON(req1->elsp_submitted); > - break; > - } > - } > - > - if (unlikely(!req0)) > - return; > - > - execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN); > + struct drm_i915_private *dev_priv = engine->i915; > + u32 *elsp = dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); > + u64 desc[2]; > > - if (req1) > - execlists_context_status_change(req1, > + if (!engine->execlist_port[0].count) > + execlists_context_status_change(engine->execlist_port[0].request, > INTEL_CONTEXT_SCHEDULE_IN); > + desc[0] = execlists_update_context(engine->execlist_port[0].request); > + engine->preempt_wa = engine->execlist_port[0].count++; > > - if (req0->elsp_submitted & engine->idle_lite_restore_wa) { > - /* > - * WaIdleLiteRestore: make sure we never cause a lite restore > - * with HEAD==TAIL. > - * > - * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we > - * resubmit the request. See gen8_emit_request() for where we > - * prepare the padding after the end of the request. > - */ > - req0->tail += 8; > - req0->tail &= req0->ring->size - 1; > + if (engine->execlist_port[1].request) { > + GEM_BUG_ON(engine->execlist_port[1].count); > + execlists_context_status_change(engine->execlist_port[1].request, > + INTEL_CONTEXT_SCHEDULE_IN); > + desc[1] = execlists_update_context(engine->execlist_port[1].request); > + engine->execlist_port[1].count = 1; > + } else { > + desc[1] = 0; > } > > - execlists_elsp_submit_contexts(req0, req1); > + /* You must always write both descriptors in the order below. */ > + writel(upper_32_bits(desc[1]), elsp); > + writel(lower_32_bits(desc[1]), elsp); > + > + writel(upper_32_bits(desc[0]), elsp); > + /* The context is automatically loaded after the following */ > + writel(lower_32_bits(desc[0]), elsp); > } > > -static unsigned int > -execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id) > +static bool merge_ctx(struct i915_gem_context *prev, > + struct i915_gem_context *next) > { > - struct drm_i915_gem_request *head_req; > + if (prev != next) > + return false; > > - assert_spin_locked(&engine->execlist_lock); > + if (IS_ENABLED(CONFIG_DRM_I915_GVT) && > + prev->execlists_force_single_submission) > + return false; > > - head_req = list_first_entry_or_null(&engine->execlist_queue, > - struct drm_i915_gem_request, > - execlist_link); > - > - if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id))) > - return 0; > + return true; > +} > > - WARN(head_req->elsp_submitted == 0, "Never submitted head request\n"); > +static void execlists_context_unqueue(struct intel_engine_cs *engine) > +{ > + struct drm_i915_gem_request *cursor, *last; > + struct execlist_port *port = engine->execlist_port; > + bool submit = false; > + > + last = port->request; > + if (last) > + /* WaIdleLiteRestore:bdw,skl > + * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL > + * as we resubmit the request. See gen8_emit_request() > + * for where we prepare the padding after the end of the > + * request. > + */ > + last->tail = last->wa_tail; > > - if (--head_req->elsp_submitted > 0) > - return 0; > + /* Try to read in pairs and fill both submission ports */ > + spin_lock(&engine->execlist_lock); > + list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) { > + if (last && !merge_ctx(cursor->ctx, last->ctx)) { > + if (port != engine->execlist_port) > + break; In here you will merge ctx also for the second port? The previous version of the code was careful to only merge for the first port/request. > > - execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT); > + if (IS_ENABLED(CONFIG_DRM_I915_GVT) && > + cursor->ctx->execlists_force_single_submission) > + break; > > - list_del(&head_req->execlist_link); > - i915_gem_request_put(head_req); > + i915_gem_request_assign(&port->request, last); > + port++; > + } > + last = cursor; > + submit = true; > + } > + if (submit) { > + i915_gem_request_assign(&port->request, last); > + engine->execlist_queue.next = &cursor->execlist_link; > + cursor->execlist_link.prev = &engine->execlist_queue; We keep the cursor pointing to the request that will be in port[0].request? while (!list_empty(engine->execlist_queue)) and then removing from the list would have made the fill loop more complex? > + } > + spin_unlock(&engine->execlist_lock); > > - return 1; > + if (submit) > + execlists_submit_ports(engine); > } > > -static u32 > -get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer, > - u32 *context_id) > +static bool execlists_elsp_idle(struct intel_engine_cs *engine) > { > - struct drm_i915_private *dev_priv = engine->i915; > - u32 status; > - > - read_pointer %= GEN8_CSB_ENTRIES; > - > - status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer)); > - > - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) > - return 0; > + return !engine->execlist_port[0].request; > +} > > - *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine, > - read_pointer)); > +static bool execlists_elsp_ready(struct intel_engine_cs *engine) > +{ > + int idx = !engine->disable_lite_restore_wa && !engine->preempt_wa; > > - return status; > + return !engine->execlist_port[idx].request; > } > > /* > @@ -544,100 +480,63 @@ static void intel_lrc_irq_handler(unsigned long data) > { > struct intel_engine_cs *engine = (struct intel_engine_cs *)data; > struct drm_i915_private *dev_priv = engine->i915; > - u32 status_pointer; > - unsigned int read_pointer, write_pointer; > - u32 csb[GEN8_CSB_ENTRIES][2]; > - unsigned int csb_read = 0, i; > - unsigned int submit_contexts = 0; > > intel_uncore_forcewake_get(dev_priv, engine->fw_domains); > > - status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine)); > - > - read_pointer = engine->next_context_status_buffer; > - write_pointer = GEN8_CSB_WRITE_PTR(status_pointer); > - if (read_pointer > write_pointer) > - write_pointer += GEN8_CSB_ENTRIES; > - > - while (read_pointer < write_pointer) { > - if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES)) > - break; > - csb[csb_read][0] = get_context_status(engine, ++read_pointer, > - &csb[csb_read][1]); > - csb_read++; > - } > - > - engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES; > - > - /* Update the read pointer to the old write pointer. Manual ringbuffer > - * management ftw </sarcasm> */ > - I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine), > - _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, > - engine->next_context_status_buffer << 8)); > - > - intel_uncore_forcewake_put(dev_priv, engine->fw_domains); > - > - spin_lock(&engine->execlist_lock); > - > - for (i = 0; i < csb_read; i++) { > - if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) { > - if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) { > - if (execlists_check_remove_request(engine, csb[i][1])) > - WARN(1, "Lite Restored request removed from queue\n"); > - } else > - WARN(1, "Preemption without Lite Restore\n"); > + if (!execlists_elsp_idle(engine)) { > + u32 *ring_mmio = > + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); > + u32 *csb_mmio = > + dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)); > + unsigned ring, head, tail; > + > + ring = readl(ring_mmio); > + head = GEN8_CSB_READ_PTR(ring); > + tail = GEN8_CSB_WRITE_PTR(ring); > + if (tail < head) > + tail += GEN8_CSB_ENTRIES; > + > + while (head < tail) { > + unsigned idx = ++head % GEN8_CSB_ENTRIES; > + unsigned status = readl(&csb_mmio[2*idx]); > + > + if (status & GEN8_CTX_STATUS_COMPLETED_MASK) { > + GEM_BUG_ON(engine->execlist_port[0].count == 0); Would this be the safety valve if we go out of sync vrt submit vs head? > + if (--engine->execlist_port[0].count == 0) { > + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); > + execlists_context_status_change(engine->execlist_port[0].request, > + INTEL_CONTEXT_SCHEDULE_OUT); > + i915_gem_request_put(engine->execlist_port[0].request); > + engine->execlist_port[0] = engine->execlist_port[1]; Intention here is to always retire through port zero, and thus the refcounts matches the submitted ones? > + memset(&engine->execlist_port[1], 0, > + sizeof(engine->execlist_port[1])); > + engine->preempt_wa = false; > + } > + } > + GEM_BUG_ON(engine->execlist_port[0].count == 0 && > + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); > } > > - if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE | > - GEN8_CTX_STATUS_ELEMENT_SWITCH)) > - submit_contexts += > - execlists_check_remove_request(engine, csb[i][1]); > + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, GEN8_CSB_WRITE_PTR(ring) << 8), > + ring_mmio); > } > > - if (submit_contexts) { > - if (!engine->disable_lite_restore_wa || > - (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE)) > - execlists_unqueue(engine); > - } > + if (execlists_elsp_ready(engine)) > + execlists_context_unqueue(engine); > > - spin_unlock(&engine->execlist_lock); > - > - if (unlikely(submit_contexts > 2)) > - DRM_ERROR("More than two context complete events?\n"); > + intel_uncore_forcewake_put(dev_priv, engine->fw_domains); > } > > static void execlists_submit_request(struct drm_i915_gem_request *request) > { > struct intel_engine_cs *engine = request->engine; > - struct drm_i915_gem_request *cursor; > - int num_elements = 0; > > spin_lock_bh(&engine->execlist_lock); > > - list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) > - if (++num_elements > 2) > - break; > - > - if (num_elements > 2) { > - struct drm_i915_gem_request *tail_req; > - > - tail_req = list_last_entry(&engine->execlist_queue, > - struct drm_i915_gem_request, > - execlist_link); > - > - if (request->ctx == tail_req->ctx) { > - WARN(tail_req->elsp_submitted != 0, > - "More than 2 already-submitted reqs queued\n"); > - list_del(&tail_req->execlist_link); > - i915_gem_request_put(tail_req); > - } > - } > - > - i915_gem_request_get(request); > list_add_tail(&request->execlist_link, &engine->execlist_queue); > - request->ctx_hw_id = request->ctx->hw_id; > - if (num_elements == 0) > - execlists_unqueue(engine); > + > + if (execlists_elsp_idle(engine)) > + tasklet_hi_schedule(&engine->irq_tasklet); > > spin_unlock_bh(&engine->execlist_lock); > } > @@ -731,6 +630,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request) > intel_ring_emit(ring, MI_NOOP); > intel_ring_emit(ring, MI_NOOP); > intel_ring_advance(ring); > + request->wa_tail = request->ring->tail; > > /* We keep the previous context alive until we retire the following > * request. This ensures that any the context object is still pinned > @@ -743,23 +643,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request) > return 0; > } > > -void intel_execlists_cancel_requests(struct intel_engine_cs *engine) > -{ > - struct drm_i915_gem_request *req, *tmp; > - LIST_HEAD(cancel_list); > - > - WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex)); > - > - spin_lock_bh(&engine->execlist_lock); > - list_replace_init(&engine->execlist_queue, &cancel_list); > - spin_unlock_bh(&engine->execlist_lock); > - > - list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) { > - list_del(&req->execlist_link); > - i915_gem_request_put(req); > - } > -} > - > static int intel_lr_context_pin(struct i915_gem_context *ctx, > struct intel_engine_cs *engine) > { > @@ -1285,7 +1168,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine) > static int gen8_init_common_ring(struct intel_engine_cs *engine) > { > struct drm_i915_private *dev_priv = engine->i915; > - unsigned int next_context_status_buffer_hw; > > lrc_init_hws(engine); > > @@ -1296,32 +1178,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) > I915_WRITE(RING_MODE_GEN7(engine), > _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | > _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); > - POSTING_READ(RING_MODE_GEN7(engine)); > > - /* > - * Instead of resetting the Context Status Buffer (CSB) read pointer to > - * zero, we need to read the write pointer from hardware and use its > - * value because "this register is power context save restored". > - * Effectively, these states have been observed: > - * > - * | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) | > - * BDW | CSB regs not reset | CSB regs reset | > - * CHT | CSB regs not reset | CSB regs not reset | > - * SKL | ? | ? | > - * BXT | ? | ? | > - */ > - next_context_status_buffer_hw = > - GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine))); > - > - /* > - * When the CSB registers are reset (also after power-up / gpu reset), > - * CSB write pointer is set to all 1's, which is not valid, use '5' in > - * this special case, so the first element read is CSB[0]. > - */ > - if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK) > - next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1); > + I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), > + _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK | > + GEN8_CSB_WRITE_PTR_MASK, > + 0)); > > - engine->next_context_status_buffer = next_context_status_buffer_hw; > DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); > > intel_engine_init_hangcheck(engine); > @@ -1706,7 +1568,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) > } > intel_lr_context_unpin(dev_priv->kernel_context, engine); > > - engine->idle_lite_restore_wa = 0; > engine->disable_lite_restore_wa = false; > engine->ctx_desc_template = 0; > > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index a52cf57dbd40..4d70346500c2 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, > int enable_execlists); > void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); > > -void intel_execlists_cancel_requests(struct intel_engine_cs *engine); > - > #endif /* _INTEL_LRC_H_ */ > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 43e545e44352..ed8a8482a7fb 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -288,11 +288,14 @@ struct intel_engine_cs { > /* Execlists */ > struct tasklet_struct irq_tasklet; > spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */ > + struct execlist_port { > + struct drm_i915_gem_request *request; > + unsigned int count; > + } execlist_port[2]; > struct list_head execlist_queue; > unsigned int fw_domains; > - unsigned int next_context_status_buffer; > - unsigned int idle_lite_restore_wa; > bool disable_lite_restore_wa; > + bool preempt_wa; This needs to be reset also in logical_ring_cleanup(). Overall this patch really makes the submission/retirement much more compact and simpler. The exception being the unqueuing part which is difficult due to the merging logic. Some bikescheds I made during review can be found in: https://cgit.freedesktop.org/~miku/drm-intel/commit/?h=review&id=3c806a9f6665db3a08f949224560bb039af1be1a -Mika > u32 ctx_desc_template; > > /** > -- > 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx