Re: [PATCH v3] drm/i915/gen11: Preempt-to-idle support in execlists.

"Lis, Tomasz" <tomasz.lis@xxxxxxxxx> · Mon, 21 May 2018 12:16:04 +0200

On 2018-05-18 23:08, Daniele Ceraolo Spurio wrote:


On 11/05/18 08:45, Tomasz Lis wrote:
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result 
from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be 
executed,
so the Command Streamer never preempted to idle directly. In Icelake 
it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
     preempt-to-idle is supported. (Chris)
     Updated setting HWACK flag so that it is cleared after
     preempt-to-dle. (Chris, Daniele)
     Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
     Merged preemption trigger functions to one. (Chris)
     Fixed context state to not assume COMPLETED_MASK after preemption,
     since idle-to-idle case will not have it set.

Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_drv.h          |   2 +
  drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
  drivers/gpu/drm/i915/i915_pci.c          |   3 +-
  drivers/gpu/drm/i915/intel_device_info.h |   1 +
  drivers/gpu/drm/i915/intel_lrc.c         | 115 
++++++++++++++++++++++---------
  drivers/gpu/drm/i915/intel_lrc.h         |   1 +
  6 files changed, 92 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h
index 57fb3aa..6e9647b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2535,6 +2535,8 @@ intel_info(const struct drm_i915_private 
*dev_priv)
          ((dev_priv)->info.has_logical_ring_elsq)
  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
          ((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+        ((dev_priv)->info.has_hw_preempt_to_idle)
    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
  diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b..bdac129 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,7 +454,10 @@ destroy_kernel_context(struct i915_gem_context 
**ctxp)
    static bool needs_preempt_context(struct drm_i915_private *i915)
  {
-    return HAS_LOGICAL_RING_PREEMPTION(i915);
+    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+           (!HAS_HW_PREEMPT_TO_IDLE(i915) ||
+        (HAS_HW_PREEMPT_TO_IDLE(i915) &&
+        !USES_GUC_SUBMISSION(i915)));

Why do we keep the preempt context for !USES_GUC_SUBMISSION(i915) even 
if HAS_HW_PREEMPT_TO_IDLE(i915)? After this patch we shouldn't need it 
anymore, right?
The patch only provides gen11 way for the non-GuC submission. This is 
why the condition is so convoluted - preempt_context is still needed if 
we use GuC.
This will be simplified after GuC paches are added.

  }
    int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c 
b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@ static const struct intel_device_info 
intel_cannonlake_info = {
      GEN(11), \
      .ddb_size = 2048, \
      .has_csr = 0, \
-    .has_logical_ring_elsq = 1
+    .has_logical_ring_elsq = 1, \
+    .has_hw_preempt_to_idle = 1
    static const struct intel_device_info intel_icelake_11_info = {
      GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@ enum intel_platform {
      func(has_logical_ring_contexts); \
      func(has_logical_ring_elsq); \
      func(has_logical_ring_preemption); \
+    func(has_hw_preempt_to_idle); \
      func(has_overlay); \
      func(has_pooled_eu); \
      func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
b/drivers/gpu/drm/i915/intel_lrc.c
index 29dcf34..8fe6795 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@
  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
  #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
  #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
    #define GEN8_CTX_STATUS_COMPLETED_MASK \
       (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -526,31 +527,49 @@ static void port_assign(struct execlist_port 
*port, struct i915_request *rq)
  static void inject_preempt_context(struct intel_engine_cs *engine)

For gen11+ we don't inject a preempt context anymore, maybe we can 
rename this function to something like "inject_preempt()".
My initial approach was to just add a second function. Merging the 
changes to inject_preempt_context() was requested by Chris; as I 
understand it is to minimize refactoring in other work in progress.

  {
      struct intel_engine_execlists *execlists = &engine->execlists;
-    struct intel_context *ce =
-        to_intel_context(engine->i915->preempt_context, engine);
-    unsigned int n;
  -    GEM_BUG_ON(execlists->preempt_complete_status !=
-           upper_32_bits(ce->lrc_desc));
-    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+        /*
+         * If we have hardware preempt-to-idle, we do not need to
+         * inject any job to the hardware. We only set a flag.
+         */
+        GEM_TRACE("%s\n", engine->name);

This trace is in both conditional branches, might be cleaner to just 
put it before the if statement.
True, I did not differentiated the messages. Will put before.

  -    /*
-     * Switch to our empty preempt context so
-     * the state of the GPU is known (idle).
-     */
-    GEM_TRACE("%s\n", engine->name);
-    for (n = execlists_num_ports(execlists); --n; )
-        write_desc(execlists, 0, n);
+        /*
+         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+         */
+        GEM_BUG_ON(execlists->ctrl_reg == NULL);
  -    write_desc(execlists, ce->lrc_desc, n);
+        /* trigger preemption to idle */
+        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+    } else {
+        struct intel_context *ce =
+ to_intel_context(engine->i915->preempt_context, engine);
+        unsigned int n;
  -    /* we need to manually load the submit queue */
-    if (execlists->ctrl_reg)
-        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+        GEM_BUG_ON(execlists->preempt_complete_status !=
+               upper_32_bits(ce->lrc_desc));
+        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+        /*
+         * Switch to our empty preempt context so
+         * the state of the GPU is known (idle).
+         */
+        GEM_TRACE("%s\n", engine->name);
+        for (n = execlists_num_ports(execlists); --n; )
+            write_desc(execlists, 0, n);
+
+        write_desc(execlists, ce->lrc_desc, n);
+
+        /* we need to manually load the submit queue */
+        if (execlists->ctrl_reg)
+            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+    }
        execlists_clear_active(&engine->execlists, 
EXECLISTS_ACTIVE_HWACK);
      execlists_set_active(&engine->execlists, 
EXECLISTS_ACTIVE_PREEMPT);
@@ -1045,22 +1064,51 @@ static void 
execlists_submission_tasklet(unsigned long data)
                    status, buf[2*head + 1],
                    execlists->active);
  -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                      GEN8_CTX_STATUS_PREEMPTED))
-                execlists_set_active(execlists,
-                             EXECLISTS_ACTIVE_HWACK);
-            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+            /*
+             * Check if preempted from idle to idle directly.
+             * The STATUS_IDLE_ACTIVE flag is used to mark
+             * such transition.
+             */
+            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
                  execlists_clear_active(execlists,
                                 EXECLISTS_ACTIVE_HWACK);

EXECLISTS_ACTIVE_HWACK should be already clear here (we clear it both 
when we inject the pre-emption and on the previous A->I CSB event), so 
there should be no need to clear it.
This is a complex case; optimizations here may lead to errors later.
But I agree - since this block is only entered on idle-to-idle 
preemption, and setting the flag can only happen when hardware is not 
idle, we should never see the ACTIVE_HWACK flag set here.
I will change it to GEM_BUG_ON(), unless I will get any errors in 
testing that.


  -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                continue;
+                /*
+                 * We could not have COMPLETED anything
+                 * if we were idle before preemption.
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+            }
+
+            else {

nitpick: formatting is wrong here.
ack.

Daniele

+                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                          GEN8_CTX_STATUS_PREEMPTED))
+                    execlists_set_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
+
+                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                    execlists_clear_active(execlists,
+                               EXECLISTS_ACTIVE_HWACK);
  -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
-            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                    continue;
  -            if (status & GEN8_CTX_STATUS_COMPLETE &&
-                buf[2*head + 1] == 
execlists->preempt_complete_status) {
+                /*
+                 * We should never get a
+                 * COMPLETED | IDLE_ACTIVE!
+                 */
+                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+            }
+
+            /*
+             * Check if preempted to real idle, either directly or
+             * the preemptive context already finished executing
+             */
+            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+                (status & GEN8_CTX_STATUS_COMPLETE &&
+                buf[2*head + 1] == 
execlists->preempt_complete_status)) {
                  GEM_TRACE("%s preempt-idle\n", engine->name);
                    execlists_cancel_port_requests(execlists);
@@ -2217,7 +2265,8 @@ static void 
execlists_set_default_submission(struct intel_engine_cs *engine)
      engine->unpark = NULL;
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-    if (engine->i915->preempt_context)
+    if (engine->i915->preempt_context ||
+        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
          engine->flags |= I915_ENGINE_HAS_PREEMPTION;
        engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
b/drivers/gpu/drm/i915/intel_lrc.h
index 4ec7d8d..b1083ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@
  #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
+ 0x510)
  #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
0x550)
  #define      EL_CTRL_LOAD                (1 << 0)
+#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
    /* The docs specify that the write pointer wraps around after 5h, 
"After status
   * is written out to the last available status QW at offset 5h, 
this pointer


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx