v2: Added needs_preempt_context() change so that it is not created when
preempt-to-idle is supported. (Chris)
Updated setting HWACK flag so that it is cleared after
preempt-to-dle. (Chris, Daniele)
Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
v3: Fixed needs_preempt_context() change. (Chris)
Merged preemption trigger functions to one. (Chris)
Fixed conyext state tonot assume COMPLETED_MASK after preemption,
since idle-to-idle case will not have it set.
v4: Simplified needs_preempt_context() change. (Daniele)
Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
v5: Renamed inject_preempt_context(). (Daniele)
Removed duplicated GEM_BUG_ON() on HWACK (Daniele)
v6: Added performance test results.
Bspec: 18922
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
Cc: Michal Winiarski <michal.winiarski@xxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
Signed-off-by: Tomasz Lis <tomasz.lis@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem_context.c | 3 +-
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/gpu/drm/i915/intel_device_info.h | 1 +
drivers/gpu/drm/i915/intel_lrc.c | 109 +++++++++++++++++++++----------
drivers/gpu/drm/i915/intel_lrc.h | 1 +
6 files changed, 84 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 08d25aa..d2cc9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2579,6 +2579,8 @@ intel_info(const struct drm_i915_private *dev_priv)
((dev_priv)->info.has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+ ((dev_priv)->info.has_hw_preempt_to_idle)
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b97963d..10b1d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,8 @@ static void init_contexts(struct drm_i915_private *i915)
static bool needs_preempt_context(struct drm_i915_private *i915)
{
- return HAS_LOGICAL_RING_PREEMPTION(i915);
+ return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+ !HAS_HW_PREEMPT_TO_IDLE(i915);
}
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4ccab83..82125cf 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -600,7 +600,8 @@ static const struct intel_device_info intel_cannonlake_info = {
TRANSCODER_DSI0_OFFSET, TRANSCODER_DSI1_OFFSET}, \
GEN(11), \
.ddb_size = 2048, \
- .has_logical_ring_elsq = 1
+ .has_logical_ring_elsq = 1, \
+ .has_hw_preempt_to_idle = 1
static const struct intel_device_info intel_icelake_11_info = {
GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 86ce1db..a2ee278 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -104,6 +104,7 @@ enum intel_ppgtt {
func(has_logical_ring_contexts); \
func(has_logical_ring_elsq); \
func(has_logical_ring_preemption); \
+ func(has_hw_preempt_to_idle); \
func(has_overlay); \
func(has_pooled_eu); \
func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 08fd9b1..26b7062 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -155,6 +155,7 @@
#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE (1 << 29)
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -500,29 +501,49 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
port_set(port, port_pack(i915_request_get(rq), port_count(port)));
}
-static void inject_preempt_context(struct intel_engine_cs *engine)
+static void execlist_send_preempt_to_idle(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
- unsigned int n;
+ GEM_TRACE("%s\n", engine->name);
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
+ if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+ /*
+ * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+ * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+ */
+ GEM_BUG_ON(execlists->ctrl_reg == NULL);
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ /*
+ * If we have hardware preempt-to-idle, we do not need to
+ * inject any job to the hardware. We only set a flag.
+ */
+ writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+ } else {
+ struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+ unsigned int n;
- write_desc(execlists, ce->lrc_desc, n);
+ GEM_BUG_ON(execlists->preempt_complete_status !=
+ upper_32_bits(ce->lrc_desc));
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ for (n = execlists_num_ports(execlists); --n; )
+ write_desc(execlists, 0, n);
+
+ write_desc(execlists, ce->lrc_desc, n);
+
+ /* we need to manually load the submit queue */
+ if (execlists->ctrl_reg)
+ writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+ }
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -595,7 +616,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;
if (need_preempt(engine, last, execlists->queue_priority)) {
- inject_preempt_context(engine);
+ execlist_send_preempt_to_idle(engine);
return;
}
@@ -922,22 +943,43 @@ static void process_csb(struct intel_engine_cs *engine)
execlists->active);
status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ /*
+ * Check if preempted from idle to idle directly.
+ * The STATUS_IDLE_ACTIVE flag is used to mark
+ * such transition.
+ */
+ if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+ (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /*
+ * We could not have COMPLETED anything
+ * if we were idle before preemption.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+ } else {
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ }
+
+ /*
+ * Check if preempted to real idle, either directly or
+ * the preemptive context already finished executing
+ */
+ if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+ (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status)) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
@@ -2150,7 +2192,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (engine->i915->preempt_context ||
+ HAS_HW_PREEMPT_TO_IDLE(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f5a5502..871901a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,7 @@
#define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base + 0x510)
#define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 0x550)
#define EL_CTRL_LOAD (1 << 0)
+#define EL_CTRL_PREEMPT_TO_IDLE (1 << 1)
/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer