Quoting Daniele Ceraolo Spurio (2019-12-11 21:12:44) > Split out all the code related to the execlists submission flow to its > own file to keep it separate from the general context management, > because the latter will be re-used by the GuC submission flow. > > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > Cc: Matthew Brost <matthew.brost@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 1 + > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + > .../drm/i915/gt/intel_execlists_submission.c | 2485 ++++++++++++++++ > .../drm/i915/gt/intel_execlists_submission.h | 58 + > drivers/gpu/drm/i915/gt/intel_lrc.c | 2511 +---------------- > drivers/gpu/drm/i915/gt/intel_lrc.h | 34 +- > .../gpu/drm/i915/gt/intel_virtual_engine.c | 1 + > drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- > drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 1 + > drivers/gpu/drm/i915/gvt/scheduler.c | 1 + > drivers/gpu/drm/i915/i915_perf.c | 1 + > 12 files changed, 2584 insertions(+), 2514 deletions(-) > create mode 100644 drivers/gpu/drm/i915/gt/intel_execlists_submission.c > create mode 100644 drivers/gpu/drm/i915/gt/intel_execlists_submission.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 79f5ef5acd4c..3640e0436c97 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -82,6 +82,7 @@ gt-y += \ > gt/intel_engine_pm.o \ > gt/intel_engine_pool.o \ > gt/intel_engine_user.o \ > + gt/intel_execlists_submission.o \ > gt/intel_gt.o \ > gt/intel_gt_irq.o \ > gt/intel_gt_pm.o \ > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 49473c25916c..0a23d01b7589 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -33,6 +33,7 @@ > #include "intel_engine_pm.h" > #include "intel_engine_pool.h" > #include "intel_engine_user.h" > +#include "intel_execlists_submission.h" > #include "intel_gt.h" > #include "intel_gt_requests.h" > #include "intel_lrc.h" > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > new file mode 100644 > index 000000000000..76b878bf15ad > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -0,0 +1,2485 @@ > +/* > + * SPDX-License-Identifier: MIT > + * > + * Copyright © 2019 Intel Corporation > + */ > + > +#include <linux/interrupt.h> > + > +#include "gem/i915_gem_context.h" > + > +#include "i915_drv.h" > +#include "i915_perf.h" > +#include "i915_trace.h" > +#include "i915_vgpu.h" > +#include "intel_engine_pm.h" > +#include "intel_gt.h" > +#include "intel_gt_pm.h" > +#include "intel_gt_requests.h" > +#include "intel_lrc_reg.h" > +#include "intel_mocs.h" > +#include "intel_reset.h" > +#include "intel_ring.h" > +#include "intel_virtual_engine.h" > +#include "intel_workarounds.h" > +#include "intel_execlists_submission.h" > + > +#define RING_EXECLIST_QFULL (1 << 0x2) > +#define RING_EXECLIST1_VALID (1 << 0x3) > +#define RING_EXECLIST0_VALID (1 << 0x4) > +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) > +#define RING_EXECLIST1_ACTIVE (1 << 0x11) > +#define RING_EXECLIST0_ACTIVE (1 << 0x12) > + > +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) > +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) > +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) > +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) > +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) > +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) > + > +#define GEN8_CTX_STATUS_COMPLETED_MASK \ > + (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) > + > +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) > + > +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ > +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ > +#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) > +#define GEN12_IDLE_CTX_ID 0x7FF > +#define GEN12_CSB_CTX_VALID(csb_dw) \ > + (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) > + > +/* Typical size of the average request (2 pipecontrols and a MI_BB) */ > +#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ > + > +static void mark_eio(struct i915_request *rq) > +{ > + if (i915_request_completed(rq)) > + return; > + > + GEM_BUG_ON(i915_request_signaled(rq)); > + > + dma_fence_set_error(&rq->fence, -EIO); > + i915_request_mark_complete(rq); > +} > + > +static struct i915_request * > +active_request(const struct intel_timeline * const tl, struct i915_request *rq) > +{ > + struct i915_request *active = rq; > + > + rcu_read_lock(); > + list_for_each_entry_continue_reverse(rq, &tl->requests, link) { > + if (i915_request_completed(rq)) > + break; > + > + active = rq; > + } > + rcu_read_unlock(); > + > + return active; > +} > + > +static inline void > +ring_set_paused(const struct intel_engine_cs *engine, int state) > +{ > + /* > + * We inspect HWS_PREEMPT with a semaphore inside > + * engine->emit_fini_breadcrumb. If the dword is true, > + * the ring is paused as the semaphore will busywait > + * until the dword is false. > + */ > + engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; > + if (state) > + wmb(); > +} > + > +static inline struct i915_priolist *to_priolist(struct rb_node *rb) > +{ > + return rb_entry(rb, struct i915_priolist, node); > +} > + > +static inline int rq_prio(const struct i915_request *rq) > +{ > + return rq->sched.attr.priority; > +} > + > +static int effective_prio(const struct i915_request *rq) > +{ > + int prio = rq_prio(rq); > + > + /* > + * If this request is special and must not be interrupted at any > + * cost, so be it. Note we are only checking the most recent request > + * in the context and so may be masking an earlier vip request. It > + * is hoped that under the conditions where nopreempt is used, this > + * will not matter (i.e. all requests to that context will be > + * nopreempt for as long as desired). > + */ > + if (i915_request_has_nopreempt(rq)) > + prio = I915_PRIORITY_UNPREEMPTABLE; > + > + /* > + * On unwinding the active request, we give it a priority bump > + * if it has completed waiting on any semaphore. If we know that > + * the request has already started, we can prevent an unwanted > + * preempt-to-idle cycle by taking that into account now. > + */ > + if (__i915_request_has_started(rq)) > + prio |= I915_PRIORITY_NOSEMAPHORE; > + > + /* Restrict mere WAIT boosts from triggering preemption */ > + BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ > + return prio | __NO_PREEMPTION; > +} > + > +static int queue_prio(const struct intel_engine_execlists *execlists) > +{ > + struct i915_priolist *p; > + struct rb_node *rb; > + > + rb = rb_first_cached(&execlists->queue); > + if (!rb) > + return INT_MIN; > + > + /* > + * As the priolist[] are inverted, with the highest priority in [0], > + * we have to flip the index value to become priority. > + */ > + p = to_priolist(rb); > + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); > +} > + > +static inline bool need_preempt(const struct intel_engine_cs *engine, > + const struct i915_request *rq, > + struct rb_node *rb) > +{ > + int last_prio; > + > + if (!intel_engine_has_semaphores(engine)) > + return false; > + > + /* > + * Check if the current priority hint merits a preemption attempt. > + * > + * We record the highest value priority we saw during rescheduling > + * prior to this dequeue, therefore we know that if it is strictly > + * less than the current tail of ESLP[0], we do not need to force > + * a preempt-to-idle cycle. > + * > + * However, the priority hint is a mere hint that we may need to > + * preempt. If that hint is stale or we may be trying to preempt > + * ourselves, ignore the request. > + * > + * More naturally we would write > + * prio >= max(0, last); > + * except that we wish to prevent triggering preemption at the same > + * priority level: the task that is running should remain running > + * to preserve FIFO ordering of dependencies. > + */ > + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); > + if (engine->execlists.queue_priority_hint <= last_prio) > + return false; > + > + /* > + * Check against the first request in ELSP[1], it will, thanks to the > + * power of PI, be the highest priority of that context. > + */ > + if (!list_is_last(&rq->sched.link, &engine->active.requests) && > + rq_prio(list_next_entry(rq, sched.link)) > last_prio) > + return true; > + > + if (rb) { > + struct intel_virtual_engine *ve = > + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); > + bool preempt = false; > + > + if (engine == ve->siblings[0]) { /* only preempt one sibling */ > + struct i915_request *next; > + > + rcu_read_lock(); > + next = READ_ONCE(ve->request); > + if (next) > + preempt = rq_prio(next) > last_prio; > + rcu_read_unlock(); > + } > + > + if (preempt) > + return preempt; > + } > + > + /* > + * If the inflight context did not trigger the preemption, then maybe > + * it was the set of queued requests? Pick the highest priority in > + * the queue (the first active priolist) and see if it deserves to be > + * running instead of ELSP[0]. > + * > + * The highest priority request in the queue can not be either > + * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same > + * context, it's priority would not exceed ELSP[0] aka last_prio. > + */ > + return queue_prio(&engine->execlists) > last_prio; > +} > + > +__maybe_unused static inline bool > +assert_priority_queue(const struct i915_request *prev, > + const struct i915_request *next) > +{ > + /* > + * Without preemption, the prev may refer to the still active element > + * which we refuse to let go. > + * > + * Even with preemption, there are times when we think it is better not > + * to preempt and leave an ostensibly lower priority request in flight. > + */ > + if (i915_request_is_active(prev)) > + return true; > + > + return rq_prio(prev) >= rq_prio(next); > +} > + > +static struct i915_request * > +__unwind_incomplete_requests(struct intel_engine_cs *engine) > +{ > + struct i915_request *rq, *rn, *active = NULL; > + struct list_head *uninitialized_var(pl); > + int prio = I915_PRIORITY_INVALID; > + > + lockdep_assert_held(&engine->active.lock); > + > + list_for_each_entry_safe_reverse(rq, rn, > + &engine->active.requests, > + sched.link) { > + if (i915_request_completed(rq)) > + continue; /* XXX */ > + > + __i915_request_unsubmit(rq); > + > + /* > + * Push the request back into the queue for later resubmission. > + * If this request is not native to this physical engine (i.e. > + * it came from a virtual source), push it back onto the virtual > + * engine so that it can be moved across onto another physical > + * engine as load dictates. > + */ > + if (likely(rq->execution_mask == engine->mask)) { > + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); > + if (rq_prio(rq) != prio) { > + prio = rq_prio(rq); > + pl = i915_sched_lookup_priolist(engine, prio); > + } > + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > + > + list_move(&rq->sched.link, pl); > + active = rq; > + } else { > + struct intel_engine_cs *owner = rq->hw_context->engine; > + > + /* > + * Decouple the virtual breadcrumb before moving it > + * back to the virtual engine -- we don't want the > + * request to complete in the background and try > + * and cancel the breadcrumb on the virtual engine > + * (instead of the old engine where it is linked)! > + */ > + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, > + &rq->fence.flags)) { > + spin_lock_nested(&rq->lock, > + SINGLE_DEPTH_NESTING); > + i915_request_cancel_breadcrumb(rq); > + spin_unlock(&rq->lock); > + } > + rq->engine = owner; > + owner->submit_request(rq); > + active = NULL; > + } > + } > + > + return active; > +} > + > +struct i915_request * > +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) There should be no exports from this file... Did you not also make guc_submission standalone? > +{ > + struct intel_engine_cs *engine = > + container_of(execlists, typeof(*engine), execlists); > + > + return __unwind_incomplete_requests(engine); > +} > + > +static inline void > +execlists_context_status_change(struct i915_request *rq, unsigned long status) > +{ > + /* > + * Only used when GVT-g is enabled now. When GVT-g is disabled, > + * The compiler should eliminate this function as dead-code. > + */ > + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) > + return; > + > + atomic_notifier_call_chain(&rq->engine->context_status_notifier, > + status, rq); > +} > + > +static void intel_engine_context_in(struct intel_engine_cs *engine) > +{ > + unsigned long flags; > + > + if (READ_ONCE(engine->stats.enabled) == 0) > + return; > + > + write_seqlock_irqsave(&engine->stats.lock, flags); > + > + if (engine->stats.enabled > 0) { > + if (engine->stats.active++ == 0) > + engine->stats.start = ktime_get(); > + GEM_BUG_ON(engine->stats.active == 0); > + } > + > + write_sequnlock_irqrestore(&engine->stats.lock, flags); > +} > + > +static void intel_engine_context_out(struct intel_engine_cs *engine) > +{ > + unsigned long flags; > + > + if (READ_ONCE(engine->stats.enabled) == 0) > + return; > + > + write_seqlock_irqsave(&engine->stats.lock, flags); > + > + if (engine->stats.enabled > 0) { > + ktime_t last; > + > + if (engine->stats.active && --engine->stats.active == 0) { > + /* > + * Decrement the active context count and in case GPU > + * is now idle add up to the running total. > + */ > + last = ktime_sub(ktime_get(), engine->stats.start); > + > + engine->stats.total = ktime_add(engine->stats.total, > + last); > + } else if (engine->stats.active == 0) { > + /* > + * After turning on engine stats, context out might be > + * the first event in which case we account from the > + * time stats gathering was turned on. > + */ > + last = ktime_sub(ktime_get(), engine->stats.enabled_at); > + > + engine->stats.total = ktime_add(engine->stats.total, > + last); > + } > + } > + > + write_sequnlock_irqrestore(&engine->stats.lock, flags); > +} > + > +static void > +execlists_check_context(const struct intel_context *ce, > + const struct intel_engine_cs *engine) > +{ > + const struct intel_ring *ring = ce->ring; > + u32 *regs = ce->lrc_reg_state; > + bool valid = true; > + int x; > + > + if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { > + pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", > + engine->name, > + regs[CTX_RING_START], > + i915_ggtt_offset(ring->vma)); > + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); > + valid = false; > + } > + > + if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != > + (RING_CTL_SIZE(ring->size) | RING_VALID)) { > + pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", > + engine->name, > + regs[CTX_RING_CTL], > + (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); > + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; > + valid = false; > + } > + > + x = intel_lrc_ring_mi_mode(engine); > + if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { > + pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", > + engine->name, regs[x + 1]); > + regs[x + 1] &= ~STOP_RING; > + regs[x + 1] |= STOP_RING << 16; > + valid = false; > + } > + > + WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); > +} > + > +static void reset_active(struct i915_request *rq, > + struct intel_engine_cs *engine) > +{ > + struct intel_context * const ce = rq->hw_context; > + u32 head; > + > + /* > + * The executing context has been cancelled. We want to prevent > + * further execution along this context and propagate the error on > + * to anything depending on its results. > + * > + * In __i915_request_submit(), we apply the -EIO and remove the > + * requests' payloads for any banned requests. But first, we must > + * rewind the context back to the start of the incomplete request so > + * that we do not jump back into the middle of the batch. > + * > + * We preserve the breadcrumbs and semaphores of the incomplete > + * requests so that inter-timeline dependencies (i.e other timelines) > + * remain correctly ordered. And we defer to __i915_request_submit() > + * so that all asynchronous waits are correctly handled. > + */ > + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", > + __func__, engine->name, rq->fence.context, rq->fence.seqno); > + > + /* On resubmission of the active request, payload will be scrubbed */ > + if (i915_request_completed(rq)) > + head = rq->tail; > + else > + head = active_request(ce->timeline, rq)->head; > + ce->ring->head = intel_ring_wrap(ce->ring, head); > + intel_ring_update_space(ce->ring); > + > + /* Scrub the context image to prevent replaying the previous batch */ > + intel_lr_context_restore_default_state(ce, engine); > + intel_lr_context_update_reg_state(ce, engine); > + > + /* We've switched away, so this should be a no-op, but intent matters */ > + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; > +} > + > +static inline struct intel_engine_cs * > +__execlists_schedule_in(struct i915_request *rq) > +{ > + struct intel_engine_cs * const engine = rq->engine; > + struct intel_context * const ce = rq->hw_context; > + > + intel_context_get(ce); > + > + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) > + reset_active(rq, engine); > + > + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) > + execlists_check_context(ce, engine); > + > + if (ce->tag) { > + /* Use a fixed tag for OA and friends */ > + ce->lrc_desc |= (u64)ce->tag << 32; > + } else { > + /* We don't need a strict matching tag, just different values */ > + ce->lrc_desc &= ~GENMASK_ULL(47, 37); > + ce->lrc_desc |= > + (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << > + GEN11_SW_CTX_ID_SHIFT; > + BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); > + } > + > + __intel_gt_pm_get(engine->gt); > + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); > + intel_engine_context_in(engine); > + > + return engine; > +} > + > +static inline struct i915_request * > +execlists_schedule_in(struct i915_request *rq, int idx) > +{ > + struct intel_context * const ce = rq->hw_context; > + struct intel_engine_cs *old; > + > + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); > + trace_i915_request_in(rq, idx); > + > + old = READ_ONCE(ce->inflight); > + do { > + if (!old) { > + WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); > + break; > + } > + } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); > + > + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); > + return i915_request_get(rq); > +} > + > +static void kick_siblings(struct i915_request *rq, struct intel_context *ce) > +{ > + struct intel_virtual_engine *ve = > + container_of(ce, typeof(*ve), context); > + struct i915_request *next = READ_ONCE(ve->request); > + > + if (next && next->execution_mask & ~rq->execution_mask) > + tasklet_schedule(&ve->base.execlists.tasklet); > +} > + > +static inline void > +__execlists_schedule_out(struct i915_request *rq, > + struct intel_engine_cs * const engine) > +{ > + struct intel_context * const ce = rq->hw_context; > + > + /* > + * NB process_csb() is not under the engine->active.lock and hence > + * schedule_out can race with schedule_in meaning that we should > + * refrain from doing non-trivial work here. > + */ > + > + /* > + * If we have just completed this context, the engine may now be > + * idle and we want to re-enter powersaving. > + */ > + if (list_is_last(&rq->link, &ce->timeline->requests) && > + i915_request_completed(rq)) > + intel_engine_add_retire(engine, ce->timeline); > + > + intel_engine_context_out(engine); > + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); > + intel_gt_pm_put_async(engine->gt); > + > + /* > + * If this is part of a virtual engine, its next request may > + * have been blocked waiting for access to the active context. > + * We have to kick all the siblings again in case we need to > + * switch (e.g. the next request is not runnable on this > + * engine). Hopefully, we will already have submitted the next > + * request before the tasklet runs and do not need to rebuild > + * each virtual tree and kick everyone again. > + */ > + if (ce->engine != engine) > + kick_siblings(rq, ce); > + > + intel_context_put(ce); > +} > + > +static inline void > +execlists_schedule_out(struct i915_request *rq) > +{ > + struct intel_context * const ce = rq->hw_context; > + struct intel_engine_cs *cur, *old; > + > + trace_i915_request_out(rq); > + > + old = READ_ONCE(ce->inflight); > + do > + cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; > + while (!try_cmpxchg(&ce->inflight, &old, cur)); > + if (!cur) > + __execlists_schedule_out(rq, old); > + > + i915_request_put(rq); > +} > + > +static u64 execlists_update_context(struct i915_request *rq) > +{ > + struct intel_context *ce = rq->hw_context; > + u64 desc = ce->lrc_desc; > + u32 tail; > + > + /* > + * WaIdleLiteRestore:bdw,skl > + * > + * We should never submit the context with the same RING_TAIL twice > + * just in case we submit an empty ring, which confuses the HW. > + * > + * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of > + * the normal request to be able to always advance the RING_TAIL on > + * subsequent resubmissions (for lite restore). Should that fail us, > + * and we try and submit the same tail again, force the context > + * reload. > + */ > + tail = intel_ring_set_tail(rq->ring, rq->tail); > + if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) > + desc |= CTX_DESC_FORCE_RESTORE; > + ce->lrc_reg_state[CTX_RING_TAIL] = tail; > + rq->tail = rq->wa_tail; > + > + /* > + * Make sure the context image is complete before we submit it to HW. > + * > + * Ostensibly, writes (including the WCB) should be flushed prior to > + * an uncached write such as our mmio register access, the empirical > + * evidence (esp. on Braswell) suggests that the WC write into memory > + * may not be visible to the HW prior to the completion of the UC > + * register write and that we may begin execution from the context > + * before its image is complete leading to invalid PD chasing. > + */ > + wmb(); > + > + /* Wa_1607138340:tgl */ > + if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) > + desc |= CTX_DESC_FORCE_RESTORE; > + > + ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; > + return desc; > +} > + > +static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) > +{ > + if (execlists->ctrl_reg) { > + writel(lower_32_bits(desc), execlists->submit_reg + port * 2); > + writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); > + } else { > + writel(upper_32_bits(desc), execlists->submit_reg); > + writel(lower_32_bits(desc), execlists->submit_reg); > + } > +} > + > +static __maybe_unused void > +trace_ports(const struct intel_engine_execlists *execlists, > + const char *msg, > + struct i915_request * const *ports) > +{ > + const struct intel_engine_cs *engine = > + container_of(execlists, typeof(*engine), execlists); > + > + if (!ports[0]) > + return; > + > + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", > + engine->name, msg, > + ports[0]->fence.context, > + ports[0]->fence.seqno, > + i915_request_completed(ports[0]) ? "!" : > + i915_request_started(ports[0]) ? "*" : > + "", > + ports[1] ? ports[1]->fence.context : 0, > + ports[1] ? ports[1]->fence.seqno : 0); > +} > + > +static __maybe_unused bool > +assert_pending_valid(const struct intel_engine_execlists *execlists, > + const char *msg) > +{ > + struct i915_request * const *port, *rq; > + struct intel_context *ce = NULL; > + > + trace_ports(execlists, msg, execlists->pending); > + > + if (!execlists->pending[0]) { > + GEM_TRACE_ERR("Nothing pending for promotion!\n"); > + return false; > + } > + > + if (execlists->pending[execlists_num_ports(execlists)]) { > + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", > + execlists_num_ports(execlists)); > + return false; > + } > + > + for (port = execlists->pending; (rq = *port); port++) { > + unsigned long flags; > + bool ok = true; > + > + GEM_BUG_ON(!kref_read(&rq->fence.refcount)); > + GEM_BUG_ON(!i915_request_is_active(rq)); > + > + if (ce == rq->hw_context) { > + GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n", > + ce->timeline->fence_context, > + port - execlists->pending); > + return false; > + } > + ce = rq->hw_context; > + > + /* Hold tightly onto the lock to prevent concurrent retires! */ > + if (!spin_trylock_irqsave(&rq->lock, flags)) > + continue; > + > + if (i915_request_completed(rq)) > + goto unlock; > + > + if (i915_active_is_idle(&ce->active) && > + !i915_gem_context_is_kernel(ce->gem_context)) { > + GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n", > + ce->timeline->fence_context, > + port - execlists->pending); > + ok = false; > + goto unlock; > + } > + > + if (!i915_vma_is_pinned(ce->state)) { > + GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n", > + ce->timeline->fence_context, > + port - execlists->pending); > + ok = false; > + goto unlock; > + } > + > + if (!i915_vma_is_pinned(ce->ring->vma)) { > + GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n", > + ce->timeline->fence_context, > + port - execlists->pending); > + ok = false; > + goto unlock; > + } > + > +unlock: > + spin_unlock_irqrestore(&rq->lock, flags); > + if (!ok) > + return false; > + } > + > + return ce; > +} > + > +static void execlists_submit_ports(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists *execlists = &engine->execlists; > + unsigned int n; > + > + GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); > + > + /* > + * We can skip acquiring intel_runtime_pm_get() here as it was taken > + * on our behalf by the request (see i915_gem_mark_busy()) and it will > + * not be relinquished until the device is idle (see > + * i915_gem_idle_work_handler()). As a precaution, we make sure > + * that all ELSP are drained i.e. we have processed the CSB, > + * before allowing ourselves to idle and calling intel_runtime_pm_put(). > + */ > + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); > + > + /* > + * ELSQ note: the submit queue is not cleared after being submitted > + * to the HW so we need to make sure we always clean it up. This is > + * currently ensured by the fact that we always write the same number > + * of elsq entries, keep this in mind before changing the loop below. > + */ > + for (n = execlists_num_ports(execlists); n--; ) { > + struct i915_request *rq = execlists->pending[n]; > + > + write_desc(execlists, > + rq ? execlists_update_context(rq) : 0, > + n); > + } > + > + /* we need to manually load the submit queue */ > + if (execlists->ctrl_reg) > + writel(EL_CTRL_LOAD, execlists->ctrl_reg); > +} > + > +static bool ctx_single_port_submission(const struct intel_context *ce) > +{ > + return (IS_ENABLED(CONFIG_DRM_I915_GVT) && > + i915_gem_context_force_single_submission(ce->gem_context)); > +} > + > +static bool can_merge_ctx(const struct intel_context *prev, > + const struct intel_context *next) > +{ > + if (prev != next) > + return false; > + > + if (ctx_single_port_submission(prev)) > + return false; > + > + return true; > +} > + > +static bool can_merge_rq(const struct i915_request *prev, > + const struct i915_request *next) > +{ > + GEM_BUG_ON(prev == next); > + GEM_BUG_ON(!assert_priority_queue(prev, next)); > + > + /* > + * We do not submit known completed requests. Therefore if the next > + * request is already completed, we can pretend to merge it in > + * with the previous context (and we will skip updating the ELSP > + * and tracking). Thus hopefully keeping the ELSP full with active > + * contexts, despite the best efforts of preempt-to-busy to confuse > + * us. > + */ > + if (i915_request_completed(next)) > + return true; > + > + if (unlikely((prev->flags ^ next->flags) & > + (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) > + return false; > + > + if (!can_merge_ctx(prev->hw_context, next->hw_context)) > + return false; > + > + return true; > +} > + > +static bool virtual_matches(const struct intel_virtual_engine *ve, > + const struct i915_request *rq, > + const struct intel_engine_cs *engine) > +{ > + const struct intel_engine_cs *inflight; > + > + if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ > + return false; > + > + /* > + * We track when the HW has completed saving the context image > + * (i.e. when we have seen the final CS event switching out of > + * the context) and must not overwrite the context image before > + * then. This restricts us to only using the active engine > + * while the previous virtualized request is inflight (so > + * we reuse the register offsets). This is a very small > + * hystersis on the greedy seelction algorithm. > + */ > + inflight = intel_context_inflight(&ve->context); > + if (inflight && inflight != engine) > + return false; > + > + return true; > +} > + > +static void virtual_xfer_breadcrumbs(struct intel_virtual_engine *ve, > + struct intel_engine_cs *engine) > +{ > + struct intel_engine_cs *old = ve->siblings[0]; > + > + /* All unattached (rq->engine == old) must already be completed */ > + > + spin_lock(&old->breadcrumbs.irq_lock); > + if (!list_empty(&ve->context.signal_link)) { > + list_move_tail(&ve->context.signal_link, > + &engine->breadcrumbs.signalers); > + intel_engine_queue_breadcrumbs(engine); > + } > + spin_unlock(&old->breadcrumbs.irq_lock); > +} > + > +static struct i915_request * > +last_active(const struct intel_engine_execlists *execlists) > +{ > + struct i915_request * const *last = READ_ONCE(execlists->active); > + > + while (*last && i915_request_completed(*last)) > + last++; > + > + return *last; > +} > + > +static void defer_request(struct i915_request *rq, struct list_head * const pl) > +{ > + LIST_HEAD(list); > + > + /* > + * We want to move the interrupted request to the back of > + * the round-robin list (i.e. its priority level), but > + * in doing so, we must then move all requests that were in > + * flight and were waiting for the interrupted request to > + * be run after it again. > + */ > + do { > + struct i915_dependency *p; > + > + GEM_BUG_ON(i915_request_is_active(rq)); > + list_move_tail(&rq->sched.link, pl); > + > + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { > + struct i915_request *w = > + container_of(p->waiter, typeof(*w), sched); > + > + /* Leave semaphores spinning on the other engines */ > + if (w->engine != rq->engine) > + continue; > + > + /* No waiter should start before its signaler */ > + GEM_BUG_ON(i915_request_started(w) && > + !i915_request_completed(rq)); > + > + GEM_BUG_ON(i915_request_is_active(w)); > + if (list_empty(&w->sched.link)) > + continue; /* Not yet submitted; unready */ > + > + if (rq_prio(w) < rq_prio(rq)) > + continue; > + > + GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); > + list_move_tail(&w->sched.link, &list); > + } > + > + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); > + } while (rq); > +} > + > +static void defer_active(struct intel_engine_cs *engine) > +{ > + struct i915_request *rq; > + > + rq = __unwind_incomplete_requests(engine); > + if (!rq) > + return; > + > + defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); > +} > + > +static bool > +need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) > +{ > + int hint; > + > + if (!intel_engine_has_timeslices(engine)) > + return false; > + > + if (list_is_last(&rq->sched.link, &engine->active.requests)) > + return false; > + > + hint = max(rq_prio(list_next_entry(rq, sched.link)), > + engine->execlists.queue_priority_hint); > + > + return hint >= effective_prio(rq); > +} > + > +static int > +switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) > +{ > + if (list_is_last(&rq->sched.link, &engine->active.requests)) > + return INT_MIN; > + > + return rq_prio(list_next_entry(rq, sched.link)); > +} > + > +static inline unsigned long > +timeslice(const struct intel_engine_cs *engine) > +{ > + return READ_ONCE(engine->props.timeslice_duration_ms); > +} > + > +static unsigned long > +active_timeslice(const struct intel_engine_cs *engine) > +{ > + const struct i915_request *rq = *engine->execlists.active; > + > + if (i915_request_completed(rq)) > + return 0; > + > + if (engine->execlists.switch_priority_hint < effective_prio(rq)) > + return 0; > + > + return timeslice(engine); > +} > + > +static void set_timeslice(struct intel_engine_cs *engine) > +{ > + if (!intel_engine_has_timeslices(engine)) > + return; > + > + set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); > +} > + > +static void record_preemption(struct intel_engine_execlists *execlists) > +{ > + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); > +} > + > +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) > +{ > + struct i915_request *rq; > + > + rq = last_active(&engine->execlists); > + if (!rq) > + return 0; > + > + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ > + if (unlikely(i915_gem_context_is_banned(rq->gem_context))) > + return 1; > + > + return READ_ONCE(engine->props.preempt_timeout_ms); > +} > + > +static void set_preempt_timeout(struct intel_engine_cs *engine) > +{ > + if (!intel_engine_has_preempt_reset(engine)) > + return; > + > + set_timer_ms(&engine->execlists.preempt, > + active_preempt_timeout(engine)); > +} > + > +static void execlists_dequeue(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + struct i915_request **port = execlists->pending; > + struct i915_request ** const last_port = port + execlists->port_mask; > + struct i915_request *last; > + struct rb_node *rb; > + bool submit = false; > + > + /* > + * Hardware submission is through 2 ports. Conceptually each port > + * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is > + * static for a context, and unique to each, so we only execute > + * requests belonging to a single context from each ring. RING_HEAD > + * is maintained by the CS in the context image, it marks the place > + * where it got up to last time, and through RING_TAIL we tell the CS > + * where we want to execute up to this time. > + * > + * In this list the requests are in order of execution. Consecutive > + * requests from the same context are adjacent in the ringbuffer. We > + * can combine these requests into a single RING_TAIL update: > + * > + * RING_HEAD...req1...req2 > + * ^- RING_TAIL > + * since to execute req2 the CS must first execute req1. > + * > + * Our goal then is to point each port to the end of a consecutive > + * sequence of requests as being the most optimal (fewest wake ups > + * and context switches) submission. > + */ > + > + for (rb = rb_first_cached(&execlists->virtual); rb; ) { > + struct intel_virtual_engine *ve = > + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); > + struct i915_request *rq = READ_ONCE(ve->request); > + > + if (!rq) { /* lazily cleanup after another engine handled rq */ > + rb_erase_cached(rb, &execlists->virtual); > + RB_CLEAR_NODE(rb); > + rb = rb_first_cached(&execlists->virtual); > + continue; > + } > + > + if (!virtual_matches(ve, rq, engine)) { > + rb = rb_next(rb); > + continue; > + } > + > + break; > + } > + > + /* > + * If the queue is higher priority than the last > + * request in the currently active context, submit afresh. > + * We will resubmit again afterwards in case we need to split > + * the active context to interject the preemption request, > + * i.e. we will retrigger preemption following the ack in case > + * of trouble. > + */ > + last = last_active(execlists); > + if (last) { > + if (need_preempt(engine, last, rb)) { > + GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", > + engine->name, > + last->fence.context, > + last->fence.seqno, > + last->sched.attr.priority, > + execlists->queue_priority_hint); > + record_preemption(execlists); > + > + /* > + * Don't let the RING_HEAD advance past the breadcrumb > + * as we unwind (and until we resubmit) so that we do > + * not accidentally tell it to go backwards. > + */ > + ring_set_paused(engine, 1); > + > + /* > + * Note that we have not stopped the GPU at this point, > + * so we are unwinding the incomplete requests as they > + * remain inflight and so by the time we do complete > + * the preemption, some of the unwound requests may > + * complete! > + */ > + __unwind_incomplete_requests(engine); > + > + /* > + * If we need to return to the preempted context, we > + * need to skip the lite-restore and force it to > + * reload the RING_TAIL. Otherwise, the HW has a > + * tendency to ignore us rewinding the TAIL to the > + * end of an earlier request. > + */ > + last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; > + last = NULL; > + } else if (need_timeslice(engine, last) && > + timer_expired(&engine->execlists.timer)) { > + GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", > + engine->name, > + last->fence.context, > + last->fence.seqno, > + last->sched.attr.priority, > + execlists->queue_priority_hint); > + > + ring_set_paused(engine, 1); > + defer_active(engine); > + > + /* > + * Unlike for preemption, if we rewind and continue > + * executing the same context as previously active, > + * the order of execution will remain the same and > + * the tail will only advance. We do not need to > + * force a full context restore, as a lite-restore > + * is sufficient to resample the monotonic TAIL. > + * > + * If we switch to any other context, similarly we > + * will not rewind TAIL of current context, and > + * normal save/restore will preserve state and allow > + * us to later continue executing the same request. > + */ > + last = NULL; > + } else { > + /* > + * Otherwise if we already have a request pending > + * for execution after the current one, we can > + * just wait until the next CS event before > + * queuing more. In either case we will force a > + * lite-restore preemption event, but if we wait > + * we hopefully coalesce several updates into a single > + * submission. > + */ > + if (!list_is_last(&last->sched.link, > + &engine->active.requests)) { > + /* > + * Even if ELSP[1] is occupied and not worthy > + * of timeslices, our queue might be. > + */ > + if (!execlists->timer.expires && > + need_timeslice(engine, last)) > + set_timer_ms(&execlists->timer, > + timeslice(engine)); > + > + return; > + } > + } > + } > + > + while (rb) { /* XXX virtual is always taking precedence */ > + struct intel_virtual_engine *ve = > + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); > + struct i915_request *rq; > + > + spin_lock(&ve->base.active.lock); > + > + rq = ve->request; > + if (unlikely(!rq)) { /* lost the race to a sibling */ > + spin_unlock(&ve->base.active.lock); > + rb_erase_cached(rb, &execlists->virtual); > + RB_CLEAR_NODE(rb); > + rb = rb_first_cached(&execlists->virtual); > + continue; > + } > + > + GEM_BUG_ON(rq != ve->request); > + GEM_BUG_ON(rq->engine != &ve->base); > + GEM_BUG_ON(rq->hw_context != &ve->context); > + > + if (rq_prio(rq) >= queue_prio(execlists)) { > + if (!virtual_matches(ve, rq, engine)) { > + spin_unlock(&ve->base.active.lock); > + rb = rb_next(rb); > + continue; > + } > + > + if (last && !can_merge_rq(last, rq)) { > + spin_unlock(&ve->base.active.lock); > + return; /* leave this for another */ > + } > + > + GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", > + engine->name, > + rq->fence.context, > + rq->fence.seqno, > + i915_request_completed(rq) ? "!" : > + i915_request_started(rq) ? "*" : > + "", > + yesno(engine != ve->siblings[0])); > + > + ve->request = NULL; > + ve->base.execlists.queue_priority_hint = INT_MIN; > + rb_erase_cached(rb, &execlists->virtual); > + RB_CLEAR_NODE(rb); > + > + GEM_BUG_ON(!(rq->execution_mask & engine->mask)); > + rq->engine = engine; > + > + if (engine != ve->siblings[0]) { > + u32 *regs = ve->context.lrc_reg_state; > + unsigned int n; > + > + GEM_BUG_ON(READ_ONCE(ve->context.inflight)); > + > + if (!intel_engine_has_relative_mmio(engine)) > + intel_lr_context_set_register_offsets(regs, > + engine); > + > + if (!list_empty(&ve->context.signals)) > + virtual_xfer_breadcrumbs(ve, engine); > + > + /* > + * Move the bound engine to the top of the list > + * for future execution. We then kick this > + * tasklet first before checking others, so that > + * we preferentially reuse this set of bound > + * registers. > + */ > + for (n = 1; n < ve->num_siblings; n++) { > + if (ve->siblings[n] == engine) { > + swap(ve->siblings[n], > + ve->siblings[0]); > + break; > + } > + } > + > + GEM_BUG_ON(ve->siblings[0] != engine); > + } > + > + if (__i915_request_submit(rq)) { > + submit = true; > + last = rq; > + } > + i915_request_put(rq); > + > + /* > + * Hmm, we have a bunch of virtual engine requests, > + * but the first one was already completed (thanks > + * preempt-to-busy!). Keep looking at the veng queue > + * until we have no more relevant requests (i.e. > + * the normal submit queue has higher priority). > + */ > + if (!submit) { > + spin_unlock(&ve->base.active.lock); > + rb = rb_first_cached(&execlists->virtual); > + continue; > + } > + } > + > + spin_unlock(&ve->base.active.lock); > + break; > + } > + > + while ((rb = rb_first_cached(&execlists->queue))) { > + struct i915_priolist *p = to_priolist(rb); > + struct i915_request *rq, *rn; > + int i; > + > + priolist_for_each_request_consume(rq, rn, p, i) { > + bool merge = true; > + > + /* > + * Can we combine this request with the current port? > + * It has to be the same context/ringbuffer and not > + * have any exceptions (e.g. GVT saying never to > + * combine contexts). > + * > + * If we can combine the requests, we can execute both > + * by updating the RING_TAIL to point to the end of the > + * second request, and so we never need to tell the > + * hardware about the first. > + */ > + if (last && !can_merge_rq(last, rq)) { > + /* > + * If we are on the second port and cannot > + * combine this request with the last, then we > + * are done. > + */ > + if (port == last_port) > + goto done; > + > + /* > + * We must not populate both ELSP[] with the > + * same LRCA, i.e. we must submit 2 different > + * contexts if we submit 2 ELSP. > + */ > + if (last->hw_context == rq->hw_context) > + goto done; > + > + if (i915_request_has_sentinel(last)) > + goto done; > + > + /* > + * If GVT overrides us we only ever submit > + * port[0], leaving port[1] empty. Note that we > + * also have to be careful that we don't queue > + * the same context (even though a different > + * request) to the second port. > + */ > + if (ctx_single_port_submission(last->hw_context) || > + ctx_single_port_submission(rq->hw_context)) > + goto done; > + > + merge = false; > + } > + > + if (__i915_request_submit(rq)) { > + if (!merge) { > + *port = execlists_schedule_in(last, port - execlists->pending); > + port++; > + last = NULL; > + } > + > + GEM_BUG_ON(last && > + !can_merge_ctx(last->hw_context, > + rq->hw_context)); > + > + submit = true; > + last = rq; > + } > + } > + > + rb_erase_cached(&p->node, &execlists->queue); > + i915_priolist_free(p); > + } > + > +done: > + /* > + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. > + * > + * We choose the priority hint such that if we add a request of greater > + * priority than this, we kick the submission tasklet to decide on > + * the right order of submitting the requests to hardware. We must > + * also be prepared to reorder requests as they are in-flight on the > + * HW. We derive the priority hint then as the first "hole" in > + * the HW submission ports and if there are no available slots, > + * the priority of the lowest executing request, i.e. last. > + * > + * When we do receive a higher priority request ready to run from the > + * user, see queue_request(), the priority hint is bumped to that > + * request triggering preemption on the next dequeue (or subsequent > + * interrupt for secondary ports). > + */ > + execlists->queue_priority_hint = queue_prio(execlists); > + GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n", > + engine->name, execlists->queue_priority_hint, > + yesno(submit)); > + > + if (submit) { > + *port = execlists_schedule_in(last, port - execlists->pending); > + execlists->switch_priority_hint = > + switch_prio(engine, *execlists->pending); > + > + /* > + * Skip if we ended up with exactly the same set of requests, > + * e.g. trying to timeslice a pair of ordered contexts > + */ > + if (!memcmp(execlists->active, execlists->pending, > + (port - execlists->pending + 1) * sizeof(*port))) { > + do > + execlists_schedule_out(fetch_and_zero(port)); > + while (port-- != execlists->pending); > + > + goto skip_submit; > + } > + > + memset(port + 1, 0, (last_port - port) * sizeof(*port)); > + execlists_submit_ports(engine); > + > + set_preempt_timeout(engine); > + } else { > +skip_submit: > + ring_set_paused(engine, 0); > + } > +} > + > +static void > +cancel_port_requests(struct intel_engine_execlists * const execlists) > +{ > + struct i915_request * const *port; > + > + for (port = execlists->pending; *port; port++) > + execlists_schedule_out(*port); > + memset(execlists->pending, 0, sizeof(execlists->pending)); > + > + /* Mark the end of active before we overwrite *active */ > + for (port = xchg(&execlists->active, execlists->pending); *port; port++) > + execlists_schedule_out(*port); > + WRITE_ONCE(execlists->active, > + memset(execlists->inflight, 0, sizeof(execlists->inflight))); > +} > + > +static inline void > +invalidate_csb_entries(const u32 *first, const u32 *last) > +{ > + clflush((void *)first); > + clflush((void *)last); > +} > + > +static inline bool > +reset_in_progress(const struct intel_engine_execlists *execlists) > +{ > + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); > +} > + > +/* > + * Starting with Gen12, the status has a new format: > + * > + * bit 0: switched to new queue > + * bit 1: reserved > + * bit 2: semaphore wait mode (poll or signal), only valid when > + * switch detail is set to "wait on semaphore" > + * bits 3-5: engine class > + * bits 6-11: engine instance > + * bits 12-14: reserved > + * bits 15-25: sw context id of the lrc the GT switched to > + * bits 26-31: sw counter of the lrc the GT switched to > + * bits 32-35: context switch detail > + * - 0: ctx complete > + * - 1: wait on sync flip > + * - 2: wait on vblank > + * - 3: wait on scanline > + * - 4: wait on semaphore > + * - 5: context preempted (not on SEMAPHORE_WAIT or > + * WAIT_FOR_EVENT) > + * bit 36: reserved > + * bits 37-43: wait detail (for switch detail 1 to 4) > + * bits 44-46: reserved > + * bits 47-57: sw context id of the lrc the GT switched away from > + * bits 58-63: sw counter of the lrc the GT switched away from > + */ > +static inline bool > +gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) > +{ > + u32 lower_dw = csb[0]; > + u32 upper_dw = csb[1]; > + bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw); > + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); > + bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; > + > + /* > + * The context switch detail is not guaranteed to be 5 when a preemption > + * occurs, so we can't just check for that. The check below works for > + * all the cases we care about, including preemptions of WAIT > + * instructions and lite-restore. Preempt-to-idle via the CTRL register > + * would require some extra handling, but we don't support that. > + */ > + if (!ctx_away_valid || new_queue) { > + GEM_BUG_ON(!ctx_to_valid); > + return true; > + } > + > + /* > + * switch detail = 5 is covered by the case above and we do not expect a > + * context switch on an unsuccessful wait instruction since we always > + * use polling mode. > + */ > + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); > + return false; > +} > + > +static inline bool > +gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) > +{ > + return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); > +} > + > +static void process_csb(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + const u32 * const buf = execlists->csb_status; > + const u8 num_entries = execlists->csb_size; > + u8 head, tail; > + > + /* > + * As we modify our execlists state tracking we require exclusive > + * access. Either we are inside the tasklet, or the tasklet is disabled > + * and we assume that is only inside the reset paths and so serialised. > + */ > + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && > + !reset_in_progress(execlists)); > + GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); > + > + /* > + * Note that csb_write, csb_status may be either in HWSP or mmio. > + * When reading from the csb_write mmio register, we have to be > + * careful to only use the GEN8_CSB_WRITE_PTR portion, which is > + * the low 4bits. As it happens we know the next 4bits are always > + * zero and so we can simply masked off the low u8 of the register > + * and treat it identically to reading from the HWSP (without having > + * to use explicit shifting and masking, and probably bifurcating > + * the code to handle the legacy mmio read). > + */ > + head = execlists->csb_head; > + tail = READ_ONCE(*execlists->csb_write); > + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); > + if (unlikely(head == tail)) > + return; > + > + /* > + * Hopefully paired with a wmb() in HW! > + * > + * We must complete the read of the write pointer before any reads > + * from the CSB, so that we do not see stale values. Without an rmb > + * (lfence) the HW may speculatively perform the CSB[] reads *before* > + * we perform the READ_ONCE(*csb_write). > + */ > + rmb(); > + > + do { > + bool promote; > + > + if (++head == num_entries) > + head = 0; > + > + /* > + * We are flying near dragons again. > + * > + * We hold a reference to the request in execlist_port[] > + * but no more than that. We are operating in softirq > + * context and so cannot hold any mutex or sleep. That > + * prevents us stopping the requests we are processing > + * in port[] from being retired simultaneously (the > + * breadcrumb will be complete before we see the > + * context-switch). As we only hold the reference to the > + * request, any pointer chasing underneath the request > + * is subject to a potential use-after-free. Thus we > + * store all of the bookkeeping within port[] as > + * required, and avoid using unguarded pointers beneath > + * request itself. The same applies to the atomic > + * status notifier. > + */ > + > + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n", > + engine->name, head, > + buf[2 * head + 0], buf[2 * head + 1]); > + > + if (INTEL_GEN(engine->i915) >= 12) > + promote = gen12_csb_parse(execlists, buf + 2 * head); > + else > + promote = gen8_csb_parse(execlists, buf + 2 * head); > + if (promote) { > + struct i915_request * const *old = execlists->active; > + > + /* Point active to the new ELSP; prevent overwriting */ > + WRITE_ONCE(execlists->active, execlists->pending); > + set_timeslice(engine); > + > + if (!inject_preempt_hang(execlists)) > + ring_set_paused(engine, 0); > + > + /* cancel old inflight, prepare for switch */ > + trace_ports(execlists, "preempted", old); > + while (*old) > + execlists_schedule_out(*old++); > + > + /* switch pending to inflight */ > + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); > + WRITE_ONCE(execlists->active, > + memcpy(execlists->inflight, > + execlists->pending, > + execlists_num_ports(execlists) * > + sizeof(*execlists->pending))); > + > + WRITE_ONCE(execlists->pending[0], NULL); > + } else { > + GEM_BUG_ON(!*execlists->active); > + > + /* port0 completed, advanced to port1 */ > + trace_ports(execlists, "completed", execlists->active); > + > + /* > + * We rely on the hardware being strongly > + * ordered, that the breadcrumb write is > + * coherent (visible from the CPU) before the > + * user interrupt and CSB is processed. > + */ > + GEM_BUG_ON(!i915_request_completed(*execlists->active) && > + !reset_in_progress(execlists)); > + execlists_schedule_out(*execlists->active++); > + > + GEM_BUG_ON(execlists->active - execlists->inflight > > + execlists_num_ports(execlists)); > + } > + } while (head != tail); > + > + execlists->csb_head = head; > + > + /* > + * Gen11 has proven to fail wrt global observation point between > + * entry and tail update, failing on the ordering and thus > + * we see an old entry in the context status buffer. > + * > + * Forcibly evict out entries for the next gpu csb update, > + * to increase the odds that we get a fresh entries with non > + * working hardware. The cost for doing so comes out mostly with > + * the wash as hardware, working or not, will need to do the > + * invalidation before. > + */ > + invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); > +} > + > +static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) > +{ > + lockdep_assert_held(&engine->active.lock); > + if (!engine->execlists.pending[0]) { > + rcu_read_lock(); /* protect peeking at execlists->active */ > + execlists_dequeue(engine); > + rcu_read_unlock(); > + } > +} > + > +static noinline void preempt_reset(struct intel_engine_cs *engine) > +{ > + const unsigned int bit = I915_RESET_ENGINE + engine->id; > + unsigned long *lock = &engine->gt->reset.flags; > + > + if (i915_modparams.reset < 3) > + return; > + > + if (test_and_set_bit(bit, lock)) > + return; > + > + /* Mark this tasklet as disabled to avoid waiting for it to complete */ > + tasklet_disable_nosync(&engine->execlists.tasklet); > + > + GEM_TRACE("%s: preempt timeout %lu+%ums\n", > + engine->name, > + READ_ONCE(engine->props.preempt_timeout_ms), > + jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); > + intel_engine_reset(engine, "preemption time out"); > + > + tasklet_enable(&engine->execlists.tasklet); > + clear_and_wake_up_bit(bit, lock); > +} > + > +static bool preempt_timeout(const struct intel_engine_cs *const engine) > +{ > + const struct timer_list *t = &engine->execlists.preempt; > + > + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) > + return false; > + > + if (!timer_expired(t)) > + return false; > + > + return READ_ONCE(engine->execlists.pending[0]); > +} > + > +/* > + * Check the unread Context Status Buffers and manage the submission of new > + * contexts to the ELSP accordingly. > + */ > +static void execlists_submission_tasklet(unsigned long data) > +{ > + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > + bool timeout = preempt_timeout(engine); > + > + process_csb(engine); > + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { > + unsigned long flags; > + > + spin_lock_irqsave(&engine->active.lock, flags); > + __execlists_submission_tasklet(engine); > + spin_unlock_irqrestore(&engine->active.lock, flags); > + > + /* Recheck after serialising with direct-submission */ > + if (timeout && preempt_timeout(engine)) > + preempt_reset(engine); > + } > +} > + > +static void __execlists_kick(struct intel_engine_execlists *execlists) > +{ > + /* Kick the tasklet for some interrupt coalescing and reset handling */ > + tasklet_hi_schedule(&execlists->tasklet); > +} > + > +#define execlists_kick(t, member) \ > + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) > + > +static void execlists_timeslice(struct timer_list *timer) > +{ > + execlists_kick(timer, timer); > +} > + > +static void execlists_preempt(struct timer_list *timer) > +{ > + execlists_kick(timer, preempt); > +} > + > +static void queue_request(struct intel_engine_cs *engine, > + struct i915_sched_node *node, > + int prio) > +{ > + GEM_BUG_ON(!list_empty(&node->link)); > + list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); > +} > + > +static void __submit_queue_imm(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + > + if (reset_in_progress(execlists)) > + return; /* defer until we restart the engine following reset */ > + > + if (execlists->tasklet.func == execlists_submission_tasklet) > + __execlists_submission_tasklet(engine); > + else > + tasklet_hi_schedule(&execlists->tasklet); > +} > + > +static void submit_queue(struct intel_engine_cs *engine, > + const struct i915_request *rq) > +{ > + struct intel_engine_execlists *execlists = &engine->execlists; > + > + if (rq_prio(rq) <= execlists->queue_priority_hint) > + return; > + > + execlists->queue_priority_hint = rq_prio(rq); > + __submit_queue_imm(engine); > +} > + > +static void execlists_submit_request(struct i915_request *request) > +{ > + struct intel_engine_cs *engine = request->engine; > + unsigned long flags; > + > + /* Will be called from irq-context when using foreign fences. */ > + spin_lock_irqsave(&engine->active.lock, flags); > + > + queue_request(engine, &request->sched, rq_prio(request)); > + > + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > + GEM_BUG_ON(list_empty(&request->sched.link)); > + > + submit_queue(engine, request); > + > + spin_unlock_irqrestore(&engine->active.lock, flags); > +} > + > +static void execlists_context_destroy(struct kref *kref) > +{ > + struct intel_context *ce = container_of(kref, typeof(*ce), ref); > + > + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); > + GEM_BUG_ON(intel_context_is_pinned(ce)); > + > + if (ce->state) > + intel_lr_context_fini(ce); > + > + intel_context_fini(ce); > + intel_context_free(ce); > +} > + > +static int execlists_context_pin(struct intel_context *ce) > +{ > + return intel_lr_context_pin(ce, ce->engine); > +} > + > +static int execlists_context_alloc(struct intel_context *ce) > +{ > + return intel_lr_context_alloc(ce, ce->engine); > +} > + > +static void execlists_context_reset(struct intel_context *ce) > +{ > + /* > + * Because we emit WA_TAIL_DWORDS there may be a disparity > + * between our bookkeeping in ce->ring->head and ce->ring->tail and > + * that stored in context. As we only write new commands from > + * ce->ring->tail onwards, everything before that is junk. If the GPU > + * starts reading from its RING_HEAD from the context, it may try to > + * execute that junk and die. > + * > + * The contexts that are stilled pinned on resume belong to the > + * kernel, and are local to each engine. All other contexts will > + * have their head/tail sanitized upon pinning before use, so they > + * will never see garbage, > + * > + * So to avoid that we reset the context images upon resume. For > + * simplicity, we just zero everything out. > + */ > + intel_ring_reset(ce->ring, 0); > + intel_lr_context_update_reg_state(ce, ce->engine); > +} > + > +static const struct intel_context_ops execlists_context_ops = { > + .alloc = execlists_context_alloc, > + > + .pin = execlists_context_pin, > + .unpin = intel_lr_context_unpin, > + > + .enter = intel_context_enter_engine, > + .exit = intel_context_exit_engine, > + > + .reset = execlists_context_reset, > + .destroy = execlists_context_destroy, > +}; > + > +static int execlists_request_alloc(struct i915_request *request) > +{ > + int ret; > + > + GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); > + > + /* > + * Flush enough space to reduce the likelihood of waiting after > + * we start building the request - in which case we will just > + * have to repeat work. > + */ > + request->reserved_space += EXECLISTS_REQUEST_SIZE; > + > + /* > + * Note that after this point, we have committed to using > + * this request as it is being used to both track the > + * state of engine initialisation and liveness of the > + * golden renderstate above. Think twice before you try > + * to cancel/unwind this request now. > + */ > + > + /* Unconditionally invalidate GPU caches and TLBs. */ > + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); > + if (ret) > + return ret; > + > + request->reserved_space -= EXECLISTS_REQUEST_SIZE; > + return 0; > +} > + > +static void execlists_reset_prepare(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + unsigned long flags; > + > + GEM_TRACE("%s: depth<-%d\n", engine->name, > + atomic_read(&execlists->tasklet.count)); > + > + /* > + * Prevent request submission to the hardware until we have > + * completed the reset in i915_gem_reset_finish(). If a request > + * is completed by one engine, it may then queue a request > + * to a second via its execlists->tasklet *just* as we are > + * calling engine->resume() and also writing the ELSP. > + * Turning off the execlists->tasklet until the reset is over > + * prevents the race. > + */ > + __tasklet_disable_sync_once(&execlists->tasklet); > + GEM_BUG_ON(!reset_in_progress(execlists)); > + > + /* And flush any current direct submission. */ > + spin_lock_irqsave(&engine->active.lock, flags); > + spin_unlock_irqrestore(&engine->active.lock, flags); > + > + /* > + * We stop engines, otherwise we might get failed reset and a > + * dead gpu (on elk). Also as modern gpu as kbl can suffer > + * from system hang if batchbuffer is progressing when > + * the reset is issued, regardless of READY_TO_RESET ack. > + * Thus assume it is best to stop engines on all gens > + * where we have a gpu reset. > + * > + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) > + * > + * FIXME: Wa for more modern gens needs to be validated > + */ > + intel_engine_stop_cs(engine); > +} > + > +static void reset_csb_pointers(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + const unsigned int reset_value = execlists->csb_size - 1; > + > + ring_set_paused(engine, 0); > + > + /* > + * After a reset, the HW starts writing into CSB entry [0]. We > + * therefore have to set our HEAD pointer back one entry so that > + * the *first* entry we check is entry 0. To complicate this further, > + * as we don't wait for the first interrupt after reset, we have to > + * fake the HW write to point back to the last entry so that our > + * inline comparison of our cached head position against the last HW > + * write works even before the first interrupt. > + */ > + execlists->csb_head = reset_value; > + WRITE_ONCE(*execlists->csb_write, reset_value); > + wmb(); /* Make sure this is visible to HW (paranoia?) */ > + > + /* > + * Sometimes Icelake forgets to reset its pointers on a GPU reset. > + * Bludgeon them with a mmio update to be sure. > + */ > + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, > + reset_value << 8 | reset_value); > + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); > + > + invalidate_csb_entries(&execlists->csb_status[0], > + &execlists->csb_status[reset_value]); > +} > + > +static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + struct intel_context *ce; > + struct i915_request *rq; > + > + mb(); /* paranoia: read the CSB pointers from after the reset */ > + clflush(execlists->csb_write); > + mb(); > + > + process_csb(engine); /* drain preemption events */ > + > + /* Following the reset, we need to reload the CSB read/write pointers */ > + reset_csb_pointers(engine); > + > + /* > + * Save the currently executing context, even if we completed > + * its request, it was still running at the time of the > + * reset and will have been clobbered. > + */ > + rq = execlists_active(execlists); > + if (!rq) > + goto unwind; > + > + /* We still have requests in-flight; the engine should be active */ > + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); > + > + ce = rq->hw_context; > + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); > + > + if (i915_request_completed(rq)) { > + /* Idle context; tidy up the ring so we can restart afresh */ > + ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); > + goto out_replay; > + } > + > + /* Context has requests still in-flight; it should not be idle! */ > + GEM_BUG_ON(i915_active_is_idle(&ce->active)); > + rq = active_request(ce->timeline, rq); > + ce->ring->head = intel_ring_wrap(ce->ring, rq->head); > + GEM_BUG_ON(ce->ring->head == ce->ring->tail); > + > + /* > + * If this request hasn't started yet, e.g. it is waiting on a > + * semaphore, we need to avoid skipping the request or else we > + * break the signaling chain. However, if the context is corrupt > + * the request will not restart and we will be stuck with a wedged > + * device. It is quite often the case that if we issue a reset > + * while the GPU is loading the context image, that the context > + * image becomes corrupt. > + * > + * Otherwise, if we have not started yet, the request should replay > + * perfectly and we do not need to flag the result as being erroneous. > + */ > + if (!i915_request_started(rq)) > + goto out_replay; > + > + /* > + * If the request was innocent, we leave the request in the ELSP > + * and will try to replay it on restarting. The context image may > + * have been corrupted by the reset, in which case we may have > + * to service a new GPU hang, but more likely we can continue on > + * without impact. > + * > + * If the request was guilty, we presume the context is corrupt > + * and have to at least restore the RING register in the context > + * image back to the expected values to skip over the guilty request. > + */ > + __i915_request_reset(rq, stalled); > + if (!stalled) > + goto out_replay; > + > + /* > + * We want a simple context + ring to execute the breadcrumb update. > + * We cannot rely on the context being intact across the GPU hang, > + * so clear it and rebuild just what we need for the breadcrumb. > + * All pending requests for this context will be zapped, and any > + * future request will be after userspace has had the opportunity > + * to recreate its own state. > + */ > + GEM_BUG_ON(!intel_context_is_pinned(ce)); > + intel_lr_context_restore_default_state(ce, engine); > + > +out_replay: > + GEM_TRACE("%s replay {head:%04x, tail:%04x}\n", > + engine->name, ce->ring->head, ce->ring->tail); > + intel_ring_update_space(ce->ring); > + intel_lr_context_reset_reg_state(ce, engine); > + intel_lr_context_update_reg_state(ce, engine); > + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ > + > +unwind: > + /* Push back any incomplete requests for replay after the reset. */ > + cancel_port_requests(execlists); > + __unwind_incomplete_requests(engine); > +} > + > +static void execlists_reset(struct intel_engine_cs *engine, bool stalled) > +{ > + unsigned long flags; > + > + GEM_TRACE("%s\n", engine->name); > + > + spin_lock_irqsave(&engine->active.lock, flags); > + > + __execlists_reset(engine, stalled); > + > + spin_unlock_irqrestore(&engine->active.lock, flags); > +} > + > +static void nop_submission_tasklet(unsigned long data) > +{ > + /* The driver is wedged; don't process any more events. */ > +} > + > +static void execlists_cancel_requests(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + struct i915_request *rq, *rn; > + struct rb_node *rb; > + unsigned long flags; > + > + GEM_TRACE("%s\n", engine->name); > + > + /* > + * Before we call engine->cancel_requests(), we should have exclusive > + * access to the submission state. This is arranged for us by the > + * caller disabling the interrupt generation, the tasklet and other > + * threads that may then access the same state, giving us a free hand > + * to reset state. However, we still need to let lockdep be aware that > + * we know this state may be accessed in hardirq context, so we > + * disable the irq around this manipulation and we want to keep > + * the spinlock focused on its duties and not accidentally conflate > + * coverage to the submission's irq state. (Similarly, although we > + * shouldn't need to disable irq around the manipulation of the > + * submission's irq state, we also wish to remind ourselves that > + * it is irq state.) > + */ > + spin_lock_irqsave(&engine->active.lock, flags); > + > + __execlists_reset(engine, true); > + > + /* Mark all executing requests as skipped. */ > + list_for_each_entry(rq, &engine->active.requests, sched.link) > + mark_eio(rq); > + > + /* Flush the queued requests to the timeline list (for retiring). */ > + while ((rb = rb_first_cached(&execlists->queue))) { > + struct i915_priolist *p = to_priolist(rb); > + int i; > + > + priolist_for_each_request_consume(rq, rn, p, i) { > + mark_eio(rq); > + __i915_request_submit(rq); > + } > + > + rb_erase_cached(&p->node, &execlists->queue); > + i915_priolist_free(p); > + } > + > + /* Cancel all attached virtual engines */ > + while ((rb = rb_first_cached(&execlists->virtual))) { > + struct intel_virtual_engine *ve = > + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); > + > + rb_erase_cached(rb, &execlists->virtual); > + RB_CLEAR_NODE(rb); > + > + spin_lock(&ve->base.active.lock); > + rq = fetch_and_zero(&ve->request); > + if (rq) { > + mark_eio(rq); > + > + rq->engine = engine; > + __i915_request_submit(rq); > + i915_request_put(rq); > + > + ve->base.execlists.queue_priority_hint = INT_MIN; > + } > + spin_unlock(&ve->base.active.lock); > + } > + > + /* Remaining _unready_ requests will be nop'ed when submitted */ > + > + execlists->queue_priority_hint = INT_MIN; > + execlists->queue = RB_ROOT_CACHED; > + > + GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); > + execlists->tasklet.func = nop_submission_tasklet; > + > + spin_unlock_irqrestore(&engine->active.lock, flags); > +} > + > +static void execlists_reset_finish(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + > + /* > + * After a GPU reset, we may have requests to replay. Do so now while > + * we still have the forcewake to be sure that the GPU is not allowed > + * to sleep before we restart and reload a context. > + */ > + GEM_BUG_ON(!reset_in_progress(execlists)); > + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) > + execlists->tasklet.func(execlists->tasklet.data); > + > + if (__tasklet_enable(&execlists->tasklet)) > + /* And kick in case we missed a new request submission. */ > + tasklet_hi_schedule(&execlists->tasklet); > + GEM_TRACE("%s: depth->%d\n", engine->name, > + atomic_read(&execlists->tasklet.count)); > +} > + > +static void execlists_park(struct intel_engine_cs *engine) > +{ > + cancel_timer(&engine->execlists.timer); > + cancel_timer(&engine->execlists.preempt); > +} > + > +static void execlists_destroy(struct intel_engine_cs *engine) > +{ > + /* Synchronise with residual timers and any softirq they raise */ > + del_timer_sync(&engine->execlists.timer); > + del_timer_sync(&engine->execlists.preempt); > + tasklet_kill(&engine->execlists.tasklet); > + > + intel_logical_ring_destroy(engine); > +} > + > +void intel_execlists_set_default_submission(struct intel_engine_cs *engine) > +{ > + engine->request_alloc = execlists_request_alloc; > + engine->submit_request = execlists_submit_request; > + engine->cancel_requests = execlists_cancel_requests; > + engine->schedule = i915_schedule; > + engine->execlists.tasklet.func = execlists_submission_tasklet; > + > + engine->reset.prepare = execlists_reset_prepare; > + engine->reset.reset = execlists_reset; > + engine->reset.finish = execlists_reset_finish; > + > + engine->destroy = execlists_destroy; > + engine->park = execlists_park; > + engine->unpark = NULL; > + > + engine->flags |= I915_ENGINE_SUPPORTS_STATS; > + if (!intel_vgpu_active(engine->i915)) { > + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; > + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) > + engine->flags |= I915_ENGINE_HAS_PREEMPTION; > + } > + > + if (INTEL_GEN(engine->i915) >= 12) > + engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; > +} > + > +int intel_execlists_submission_setup(struct intel_engine_cs *engine) > +{ > + tasklet_init(&engine->execlists.tasklet, > + execlists_submission_tasklet, (unsigned long)engine); > + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); > + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); > + > + intel_logical_ring_setup(engine); > + > + engine->set_default_submission = intel_execlists_set_default_submission; > + engine->cops = &execlists_context_ops; > + > + return 0; > +} > + > +int intel_execlists_submission_init(struct intel_engine_cs *engine) > +{ > + struct intel_engine_execlists * const execlists = &engine->execlists; > + struct drm_i915_private *i915 = engine->i915; > + struct intel_uncore *uncore = engine->uncore; > + u32 base = engine->mmio_base; > + int ret; > + > + ret = intel_logical_ring_init(engine); > + if (ret) > + return ret; > + > + if (HAS_LOGICAL_RING_ELSQ(i915)) { > + execlists->submit_reg = uncore->regs + > + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); > + execlists->ctrl_reg = uncore->regs + > + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); > + } else { > + execlists->submit_reg = uncore->regs + > + i915_mmio_reg_offset(RING_ELSP(base)); > + } > + > + execlists->csb_status = > + &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; > + > + execlists->csb_write = > + &engine->status_page.addr[intel_hws_csb_write_index(i915)]; > + > + if (INTEL_GEN(i915) < 11) > + execlists->csb_size = GEN8_CSB_ENTRIES; > + else > + execlists->csb_size = GEN11_CSB_ENTRIES; > + > + reset_csb_pointers(engine); > + > + return 0; > +} > + > +static intel_engine_mask_t > +virtual_submission_mask(struct intel_virtual_engine *ve) > +{ > + struct i915_request *rq; > + intel_engine_mask_t mask; > + > + rq = READ_ONCE(ve->request); > + if (!rq) > + return 0; > + > + /* The rq is ready for submission; rq->execution_mask is now stable. */ > + mask = rq->execution_mask; > + if (unlikely(!mask)) { > + /* Invalid selection, submit to a random engine in error */ > + i915_request_skip(rq, -ENODEV); > + mask = ve->siblings[0]->mask; > + } > + > + GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n", > + ve->base.name, > + rq->fence.context, rq->fence.seqno, > + mask, ve->base.execlists.queue_priority_hint); > + > + return mask; > +} > + > +static void virtual_submission_tasklet(unsigned long data) > +{ > + struct intel_virtual_engine * const ve = > + (struct intel_virtual_engine *)data; > + const int prio = ve->base.execlists.queue_priority_hint; > + intel_engine_mask_t mask; > + unsigned int n; > + > + rcu_read_lock(); > + mask = virtual_submission_mask(ve); > + rcu_read_unlock(); > + if (unlikely(!mask)) > + return; > + > + local_irq_disable(); > + for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { > + struct intel_engine_cs *sibling = ve->siblings[n]; > + struct ve_node * const node = &ve->nodes[sibling->id]; > + struct rb_node **parent, *rb; > + bool first; > + > + if (unlikely(!(mask & sibling->mask))) { > + if (!RB_EMPTY_NODE(&node->rb)) { > + spin_lock(&sibling->active.lock); > + rb_erase_cached(&node->rb, > + &sibling->execlists.virtual); > + RB_CLEAR_NODE(&node->rb); > + spin_unlock(&sibling->active.lock); > + } > + continue; > + } > + > + spin_lock(&sibling->active.lock); > + > + if (!RB_EMPTY_NODE(&node->rb)) { > + /* > + * Cheat and avoid rebalancing the tree if we can > + * reuse this node in situ. > + */ > + first = rb_first_cached(&sibling->execlists.virtual) == > + &node->rb; > + if (prio == node->prio || (prio > node->prio && first)) > + goto submit_engine; > + > + rb_erase_cached(&node->rb, &sibling->execlists.virtual); > + } > + > + rb = NULL; > + first = true; > + parent = &sibling->execlists.virtual.rb_root.rb_node; > + while (*parent) { > + struct ve_node *other; > + > + rb = *parent; > + other = rb_entry(rb, typeof(*other), rb); > + if (prio > other->prio) { > + parent = &rb->rb_left; > + } else { > + parent = &rb->rb_right; > + first = false; > + } > + } > + > + rb_link_node(&node->rb, rb, parent); > + rb_insert_color_cached(&node->rb, > + &sibling->execlists.virtual, > + first); > + > +submit_engine: > + GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); > + node->prio = prio; > + if (first && prio > sibling->execlists.queue_priority_hint) { > + sibling->execlists.queue_priority_hint = prio; > + tasklet_hi_schedule(&sibling->execlists.tasklet); > + } > + > + spin_unlock(&sibling->active.lock); > + } > + local_irq_enable(); > +} > + > +static void virtual_submit_request(struct i915_request *rq) > +{ > + struct intel_virtual_engine *ve = to_virtual_engine(rq->engine); > + struct i915_request *old; > + unsigned long flags; > + > + GEM_TRACE("%s: rq=%llx:%lld\n", > + ve->base.name, > + rq->fence.context, > + rq->fence.seqno); > + > + GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); > + > + spin_lock_irqsave(&ve->base.active.lock, flags); > + > + old = ve->request; > + if (old) { /* background completion event from preempt-to-busy */ > + GEM_BUG_ON(!i915_request_completed(old)); > + __i915_request_submit(old); > + i915_request_put(old); > + } > + > + if (i915_request_completed(rq)) { > + __i915_request_submit(rq); > + > + ve->base.execlists.queue_priority_hint = INT_MIN; > + ve->request = NULL; > + } else { > + ve->base.execlists.queue_priority_hint = rq_prio(rq); > + ve->request = i915_request_get(rq); > + > + GEM_BUG_ON(!list_empty(intel_virtual_engine_queue(ve))); > + list_move_tail(&rq->sched.link, intel_virtual_engine_queue(ve)); > + > + tasklet_schedule(&ve->base.execlists.tasklet); > + } > + > + spin_unlock_irqrestore(&ve->base.active.lock, flags); > +} > + > +static void > +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) > +{ > + struct intel_virtual_engine *ve = to_virtual_engine(rq->engine); > + intel_engine_mask_t allowed, exec; > + struct ve_bond *bond; > + > + allowed = ~to_request(signal)->engine->mask; > + > + bond = intel_virtual_engine_find_bond(ve, to_request(signal)->engine); > + if (bond) > + allowed &= bond->sibling_mask; > + > + /* Restrict the bonded request to run on only the available engines */ > + exec = READ_ONCE(rq->execution_mask); > + while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) > + ; > + > + /* Prevent the master from being re-run on the bonded engines */ > + to_request(signal)->execution_mask &= ~allowed; > +} > + > +void intel_execlists_virtual_submission_init(struct intel_virtual_engine *ve) > +{ > + ve->base.request_alloc = execlists_request_alloc; > + ve->base.submit_request = virtual_submit_request; > + ve->base.bond_execute = virtual_bond_execute; > + tasklet_init(&ve->base.execlists.tasklet, > + virtual_submission_tasklet, > + (unsigned long)ve); > +} > + > +void intel_execlists_show_requests(struct intel_engine_cs *engine, > + struct drm_printer *m, > + void (*show_request)(struct drm_printer *m, > + struct i915_request *rq, > + const char *prefix), > + unsigned int max) > +{ > + const struct intel_engine_execlists *execlists = &engine->execlists; > + struct i915_request *rq, *last; > + unsigned long flags; > + unsigned int count; > + struct rb_node *rb; > + > + spin_lock_irqsave(&engine->active.lock, flags); > + > + last = NULL; > + count = 0; > + list_for_each_entry(rq, &engine->active.requests, sched.link) { > + if (count++ < max - 1) > + show_request(m, rq, "\t\tE "); > + else > + last = rq; > + } > + if (last) { > + if (count > max) { > + drm_printf(m, > + "\t\t...skipping %d executing requests...\n", > + count - max); > + } > + show_request(m, last, "\t\tE "); > + } > + > + last = NULL; > + count = 0; > + if (execlists->queue_priority_hint != INT_MIN) > + drm_printf(m, "\t\tQueue priority hint: %d\n", > + execlists->queue_priority_hint); > + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { > + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); > + int i; > + > + priolist_for_each_request(rq, p, i) { > + if (count++ < max - 1) > + show_request(m, rq, "\t\tQ "); > + else > + last = rq; > + } > + } > + if (last) { > + if (count > max) { > + drm_printf(m, > + "\t\t...skipping %d queued requests...\n", > + count - max); > + } > + show_request(m, last, "\t\tQ "); > + } > + > + last = NULL; > + count = 0; > + for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { > + struct intel_virtual_engine *ve = > + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); > + struct i915_request *rq = READ_ONCE(ve->request); > + > + if (rq) { > + if (count++ < max - 1) > + show_request(m, rq, "\t\tV "); > + else > + last = rq; > + } > + } > + if (last) { > + if (count > max) { > + drm_printf(m, > + "\t\t...skipping %d virtual requests...\n", > + count - max); > + } > + show_request(m, last, "\t\tV "); > + } > + > + spin_unlock_irqrestore(&engine->active.lock, flags); > +} > + > +bool > +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) > +{ > + return engine->set_default_submission == > + intel_execlists_set_default_submission; > +} The breadcrumb submission code is specialised to execlists and should not be shared (leaves emit_flush, emit_bb_start as common gen8_submission.c). The reset code is specialised to execlists and should not be shared. The virtual engine is specialised to execlists and should not be shared. Even submit_request should be distinct between guc and execlists, especially request_alloc (which you may like to put on the context_ops rather than engine) -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx