---
drivers/gpu/drm/i915/gt/intel_context_types.h | 9 +
drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 +
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 231 +++++++++---------
3 files changed, 127 insertions(+), 117 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 90026c177105..6d99631d19b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -137,6 +137,15 @@ struct intel_context {
struct intel_sseu sseu;
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
+
+ /* GuC scheduling state flags that do not require a lock. */
+ atomic_t guc_sched_state_no_lock;
+
+ /*
+ * GuC LRC descriptor ID - Not assigned in this patch but future patches
+ * in the series will.
+ */
+ u16 guc_id;
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 35783558d261..8c7b92f699f1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -30,6 +30,10 @@ struct intel_guc {
struct intel_guc_log log;
struct intel_guc_ct ct;
+ /* Global engine used to submit requests to GuC */
+ struct i915_sched_engine *sched_engine;
+ struct i915_request *stalled_request;
+
/* intel_guc_recv interrupt related state */
spinlock_t irq_lock;
unsigned int msg_enabled_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a94a896a0b..ca0717166a27 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -60,6 +60,31 @@
#define GUC_REQUEST_SIZE 64 /* bytes */
+/*
+ * Below is a set of functions which control the GuC scheduling state which do
+ * not require a lock as all state transitions are mutually exclusive. i.e. It
+ * is not possible for the context pinning code and submission, for the same
+ * context, to be executing simultaneously. We still need an atomic as it is
+ * possible for some of the bits to changing at the same time though.
+ */
+#define SCHED_STATE_NO_LOCK_ENABLED BIT(0)
+static inline bool context_enabled(struct intel_context *ce)
+{
+ return (atomic_read(&ce->guc_sched_state_no_lock) &
+ SCHED_STATE_NO_LOCK_ENABLED);
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+ atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+ atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
+ &ce->guc_sched_state_no_lock);
+}
+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
@@ -122,37 +147,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC);
}
-static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{
- /* Leaving stub as this function will be used in future patches */
-}
+ int err;
+ struct intel_context *ce = rq->context;
+ u32 action[3];
+ int len = 0;
+ bool enabled = context_enabled(ce);
-/*
- * When we're doing submissions using regular execlists backend, writing to
- * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
- * pinned in mappable aperture portion of GGTT are visible to command streamer.
- * Writes done by GuC on our behalf are not guaranteeing such ordering,
- * therefore, to ensure the flush, we're issuing a POSTING READ.
- */
-static void flush_ggtt_writes(struct i915_vma *vma)
-{
- if (i915_vma_is_map_and_fenceable(vma))
- intel_uncore_posting_read_fw(vma->vm->gt->uncore,
- GUC_STATUS);
-}
+ if (!enabled) {
+ action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+ action[len++] = ce->guc_id;
+ action[len++] = GUC_CONTEXT_ENABLE;
+ } else {
+ action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
+ action[len++] = ce->guc_id;
+ }
-static void guc_submit(struct intel_engine_cs *engine,
- struct i915_request **out,
- struct i915_request **end)
-{
- struct intel_guc *guc = &engine->gt->uc.guc;
+ err = intel_guc_send_nb(guc, action, len);
- do {
- struct i915_request *rq = *out++;
+ if (!enabled && !err)
+ set_context_enabled(ce);
- flush_ggtt_writes(rq->ring->vma);
- guc_add_request(guc, rq);
- } while (out != end);
+ return err;
}
static inline int rq_prio(const struct i915_request *rq)
@@ -160,125 +177,88 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
}
-static struct i915_request *schedule_in(struct i915_request *rq, int idx)
+static int guc_dequeue_one_context(struct intel_guc *guc)
{
- trace_i915_request_in(rq, idx);
-
- /*
- * Currently we are not tracking the rq->context being inflight
- * (ce->inflight = rq->engine). It is only used by the execlists
- * backend at the moment, a similar counting strategy would be
- * required if we generalise the inflight tracking.
- */
-
- __intel_gt_pm_get(rq->engine->gt);
- return i915_request_get(rq);
-}
-
-static void schedule_out(struct i915_request *rq)
-{
- trace_i915_request_out(rq);
-
- intel_gt_pm_put_async(rq->engine->gt);
- i915_request_put(rq);
-}
-
-static void __guc_dequeue(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct i915_sched_engine * const sched_engine = engine->sched_engine;
- struct i915_request **first = execlists->inflight;
- struct i915_request ** const last_port = first + execlists->port_mask;
- struct i915_request *last = first[0];
- struct i915_request **port;
+ struct i915_sched_engine * const sched_engine = guc->sched_engine;
+ struct i915_request *last = NULL;
bool submit = false;
struct rb_node *rb;
+ int ret;
lockdep_assert_held(&sched_engine->lock);
- if (last) {
- if (*++first)
- return;
-
- last = NULL;
+ if (guc->stalled_request) {
+ submit = true;
+ last = guc->stalled_request;
+ goto resubmit;
}
- /*
- * We write directly into the execlists->inflight queue and don't use
- * the execlists->pending queue, as we don't have a distinct switch
- * event.
- */
- port = first;
while ((rb = rb_first_cached(&sched_engine->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
priolist_for_each_request_consume(rq, rn, p) {
- if (last && rq->context != last->context) {
- if (port == last_port)
- goto done;
-
- *port = schedule_in(last,
- port - execlists->inflight);
- port++;
- }
+ if (last && rq->context != last->context)
+ goto done;
list_del_init(&rq->sched.link);
+
__i915_request_submit(rq);
- submit = true;
+
+ trace_i915_request_in(rq, 0);
last = rq;
+ submit = true;
}
rb_erase_cached(&p->node, &sched_engine->queue);
i915_priolist_free(p);
}
done:
- sched_engine->queue_priority_hint =
- rb ? to_priolist(rb)->priority : INT_MIN;
if (submit) {
- *port = schedule_in(last, port - execlists->inflight);
- *++port = NULL;
- guc_submit(engine, first, port);
+ last->context->lrc_reg_state[CTX_RING_TAIL] =
+ intel_ring_set_tail(last->ring, last->tail);
+resubmit:
+ /*
+ * We only check for -EBUSY here even though it is possible for
+ * -EDEADLK to be returned. If -EDEADLK is returned, the GuC has
+ * died and a full GT reset needs to be done. The hangcheck will
+ * eventually detect that the GuC has died and trigger this
+ * reset so no need to handle -EDEADLK here.
+ */
+ ret = guc_add_request(guc, last);
+ if (ret == -EBUSY) {
+ tasklet_schedule(&sched_engine->tasklet);
+ guc->stalled_request = last;
+ return false;
+ }
}
- execlists->active = execlists->inflight;
+
+ guc->stalled_request = NULL;
+ return submit;
}
static void guc_submission_tasklet(struct tasklet_struct *t)
{
struct i915_sched_engine *sched_engine =
from_tasklet(sched_engine, t, tasklet);
- struct intel_engine_cs * const engine = sched_engine->private_data;
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct i915_request **port, *rq;
unsigned long flags;
+ bool loop;
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
-
- for (port = execlists->inflight; (rq = *port); port++) {
- if (!i915_request_completed(rq))
- break;
-
- schedule_out(rq);
- }
- if (port != execlists->inflight) {
- int idx = port - execlists->inflight;
- int rem = ARRAY_SIZE(execlists->inflight) - idx;
- memmove(execlists->inflight, port, rem * sizeof(*port));
- }
+ spin_lock_irqsave(&sched_engine->lock, flags);
- __guc_dequeue(engine);
+ do {
+ loop = guc_dequeue_one_context(sched_engine->private_data);
+ } while (loop);
- i915_sched_engine_reset_on_empty(engine->sched_engine);
+ i915_sched_engine_reset_on_empty(sched_engine);
- spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+ spin_unlock_irqrestore(&sched_engine->lock, flags);
}
static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
{
- if (iir & GT_RENDER_USER_INTERRUPT) {
+ if (iir & GT_RENDER_USER_INTERRUPT)
intel_engine_signal_breadcrumbs(engine);
- tasklet_hi_schedule(&engine->sched_engine->tasklet);
- }
}
static void guc_reset_prepare(struct intel_engine_cs *engine)
@@ -349,6 +329,10 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
struct rb_node *rb;
unsigned long flags;
+ /* Can be called during boot if GuC fails to load */
+ if (!engine->gt)
+ return;
+
ENGINE_TRACE(engine, "\n");
/*
@@ -433,8 +417,11 @@ int intel_guc_submission_init(struct intel_guc *guc)
void intel_guc_submission_fini(struct intel_guc *guc)
{
- if (guc->lrc_desc_pool)
- guc_lrc_desc_pool_destroy(guc);
+ if (!guc->lrc_desc_pool)
+ return;
+
+ guc_lrc_desc_pool_destroy(guc);
+ i915_sched_engine_put(guc->sched_engine);
}
static int guc_context_alloc(struct intel_context *ce)
@@ -499,32 +486,32 @@ static int guc_request_alloc(struct i915_request *request)
return 0;
}
-static inline void queue_request(struct intel_engine_cs *engine,
+static inline void queue_request(struct i915_sched_engine *sched_engine,
struct i915_request *rq,
int prio)
{
GEM_BUG_ON(!list_empty(&rq->sched.link));
list_add_tail(&rq->sched.link,
- i915_sched_lookup_priolist(engine->sched_engine, prio));
+ i915_sched_lookup_priolist(sched_engine, prio));
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
}
static void guc_submit_request(struct i915_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
+ struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
+ spin_lock_irqsave(&sched_engine->lock, flags);
- queue_request(engine, rq, rq_prio(rq));
+ queue_request(sched_engine, rq, rq_prio(rq));
- GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine));
+ GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
GEM_BUG_ON(list_empty(&rq->sched.link));
- tasklet_hi_schedule(&engine->sched_engine->tasklet);
+ tasklet_hi_schedule(&sched_engine->tasklet);
- spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+ spin_unlock_irqrestore(&sched_engine->lock, flags);
}
static void sanitize_hwsp(struct intel_engine_cs *engine)
@@ -602,8 +589,6 @@ static void guc_release(struct intel_engine_cs *engine)
{
engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
- tasklet_kill(&engine->sched_engine->tasklet);
-
intel_engine_cleanup_common(engine);
lrc_fini_wa_ctx(engine);
}
@@ -674,6 +659,7 @@ static inline void guc_default_irqs(struct intel_engine_cs *engine)
int intel_guc_submission_setup(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_guc *guc = &engine->gt->uc.guc;
/*
* The setup relies on several assumptions (e.g. irqs always enabled)
@@ -681,7 +667,18 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
*/
GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
- tasklet_setup(&engine->sched_engine->tasklet, guc_submission_tasklet);
+ if (!guc->sched_engine) {
+ guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
+ if (!guc->sched_engine)
+ return -ENOMEM;
+
+ guc->sched_engine->schedule = i915_schedule;
+ guc->sched_engine->private_data = guc;
+ tasklet_setup(&guc->sched_engine->tasklet,
+ guc_submission_tasklet);
+ }
+ i915_sched_engine_put(engine->sched_engine);
+ engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
guc_default_vfuncs(engine);
guc_default_irqs(engine);