Re: [PATCH v2 1/2] drm/i915: Initialize bdw workarounds in logical ring mode too

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 04/11/2014 19:23, Rodrigo Vivi wrote:
These patches got listed to -collector but got a huge conflict. If it
is still relevant please rebase it.

This patch is currently not relevant, rebased version is already sent to the list for review.

https://patchwork.kernel.org/patch/5178771/

regards
Arun

Also my bikeshed is to findo better names to help on differentiate
them at least.

On Wed, Sep 24, 2014 at 5:02 AM, Michel Thierry
<michel.thierry@xxxxxxxxx> wrote:
Following the legacy ring submission example, update the
ring->init_context() hook to support the execlist submission mode.

Workarounds are defined in bdw_emit_workarounds(), but the emit
now depends on the ring submission mode.

v2: Updated after "Cleanup pre prod workarounds"

For: VIZ-4092
Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
  drivers/gpu/drm/i915/intel_lrc.c        | 66 +++++++++++++++++++++++++++++
  drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
  drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
  4 files changed, 120 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 7b73b36..d1ed21a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -657,7 +657,7 @@ done:

         if (uninitialized) {
                 if (ring->init_context) {
-                       ret = ring->init_context(ring);
+                       ret = ring->init_context(ring->buffer);
                         if (ret)
                                 DRM_ERROR("ring init context: %d\n", ret);
                 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d64d518..a0aa3f0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1020,6 +1020,62 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
         return 0;
  }

+static inline void intel_logical_ring_emit_wa(struct intel_ringbuffer *ringbuf,
+                                      u32 addr, u32 value)
+{
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
+               return;
+
+       intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+       intel_logical_ring_emit(ringbuf, addr);
+       intel_logical_ring_emit(ringbuf, value);
+
+       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
+       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
+       /* value is updated with the status of remaining bits of this
+        * register when it is read from debugfs file
+        */
+       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
+       dev_priv->num_wa_regs++;
+}
+
+static int bdw_init_logical_workarounds(struct intel_ringbuffer *ringbuf)
+{
+       int ret;
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /*
+        * workarounds applied in this fn are part of register state context,
+        * they need to be re-initialized followed by gpu reset, suspend/resume,
+        * module reload.
+        */
+       dev_priv->num_wa_regs = 0;
+       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
+
+       /*
+        * update the number of dwords required based on the
+        * actual number of workarounds applied
+        */
+       ret = intel_logical_ring_begin(ringbuf, BDW_WA_DWORDS_SIZE);
+       if (ret)
+               return ret;
+
+       bdw_emit_workarounds(ringbuf);
+
+       intel_logical_ring_advance(ringbuf);
+
+       DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
+                        dev_priv->num_wa_regs);
+
+       return 0;
+}
+
  static int gen8_init_common_ring(struct intel_engine_cs *ring)
  {
         struct drm_device *dev = ring->dev;
@@ -1315,6 +1371,10 @@ static int logical_render_ring_init(struct drm_device *dev)
         if (HAS_L3_DPF(dev))
                 ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;

+       if (IS_BROADWELL(dev))
+               ring->init_context = bdw_init_logical_workarounds;
+       ring->emit_wa = intel_logical_ring_emit_wa;
+
         ring->init = gen8_init_render_ring;
         ring->cleanup = intel_fini_pipe_control;
         ring->get_seqno = gen8_get_seqno;
@@ -1802,6 +1862,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
         }

         if (ring->id == RCS && !ctx->rcs_initialized) {
+               if (ring->init_context) {
+                       ret = ring->init_context(ringbuf);
+                       if (ret)
+                               DRM_ERROR("ring init context: %d\n", ret);
+               }
+
                 ret = intel_lr_context_render_state_init(ring, ctx);
                 if (ret) {
                         DRM_ERROR("Init render state failed: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 395f926..e6ac913 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -677,9 +677,10 @@ err:
         return ret;
  }

-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
+static inline void intel_ring_emit_wa(struct intel_ringbuffer *ringbuf,
                                        u32 addr, u32 value)
  {
+       struct intel_engine_cs *ring = ringbuf->ring;
         struct drm_device *dev = ring->dev;
         struct drm_i915_private *dev_priv = dev->dev_private;

@@ -701,51 +702,33 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
         return;
  }

-static int bdw_init_workarounds(struct intel_engine_cs *ring)
+void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf)
  {
-       int ret;
-       struct drm_device *dev = ring->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /*
-        * workarounds applied in this fn are part of register state context,
-        * they need to be re-initialized followed by gpu reset, suspend/resume,
-        * module reload.
-        */
-       dev_priv->num_wa_regs = 0;
-       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-       /*
-        * update the number of dwords required based on the
-        * actual number of workarounds applied
-        */
-       ret = intel_ring_begin(ring, 18);
-       if (ret)
-               return ret;
+       struct intel_engine_cs *ring = ringbuf->ring;

         /* WaDisablePartialInstShootdown:bdw */
         /* WaDisableThreadStallDopClockGating:bdw */
         /* FIXME: Unclear whether we really need this on production bdw. */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
+       ring->emit_wa(ringbuf, GEN8_ROW_CHICKEN,
                            _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
                                              | STALL_DOP_GATING_DISABLE));

         /* WaDisableDopClockGating:bdw May not be needed for production */
-       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
+       ring->emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));

-       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
+       ring->emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
                            _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));

         /* Use Force Non-Coherent whenever executing a 3D context. This is a
          * workaround for for a possible hang in the unlikely event a TLB
          * invalidation occurs during a PSD flush.
          */
-       intel_ring_emit_wa(ring, HDC_CHICKEN0,
+       ring->emit_wa(ringbuf, HDC_CHICKEN0,
                            _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));

         /* Wa4x4STCOptimizationDisable:bdw */
-       intel_ring_emit_wa(ring, CACHE_MODE_1,
+       ring->emit_wa(ringbuf, CACHE_MODE_1,
                            _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));

         /*
@@ -756,8 +739,34 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
          * disable bit, which we don't touch here, but it's good
          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
          */
-       intel_ring_emit_wa(ring, GEN7_GT_MODE,
+       ring->emit_wa(ringbuf, GEN7_GT_MODE,
                            GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+}
+
+static int bdw_init_workarounds(struct intel_ringbuffer *ringbuf)
+{
+       int ret;
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /*
+        * workarounds applied in this fn are part of register state context,
+        * they need to be re-initialized followed by gpu reset, suspend/resume,
+        * module reload.
+        */
+       dev_priv->num_wa_regs = 0;
+       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
+
+       /*
+        * update the number of dwords required based on the
+        * actual number of workarounds applied
+        */
+       ret = intel_ring_begin(ring, BDW_WA_DWORDS_SIZE);
+       if (ret)
+               return ret;
+
+       bdw_emit_workarounds(ringbuf);

         intel_ring_advance(ring);

@@ -767,9 +776,10 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
         return 0;
  }

-static int chv_init_workarounds(struct intel_engine_cs *ring)
+static int chv_init_workarounds(struct intel_ringbuffer *ringbuf)
  {
         int ret;
+       struct intel_engine_cs *ring = ringbuf->ring;
         struct drm_device *dev = ring->dev;
         struct drm_i915_private *dev_priv = dev->dev_private;

@@ -786,19 +796,19 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
                 return ret;

         /* WaDisablePartialInstShootdown:chv */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
+       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
                            _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));

         /* WaDisableThreadStallDopClockGating:chv */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
+       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
                            _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));

         /* WaDisableDopClockGating:chv (pre-production hw) */
-       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
+       intel_ring_emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));

         /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
-       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
+       intel_ring_emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
                            _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));

         intel_ring_advance(ring);
@@ -2310,6 +2320,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
                         ring->init_context = chv_init_workarounds;
                 else
                         ring->init_context = bdw_init_workarounds;
+               ring->emit_wa = intel_ring_emit_wa;
                 ring->add_request = gen6_add_request;
                 ring->flush = gen8_render_ring_flush;
                 ring->irq_get = gen8_ring_get_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 07f66d4..417aa09 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -12,6 +12,11 @@
   */
  #define CACHELINE_BYTES 64

+/* Number of dwords required based on the
+ * actual number of workarounds applied
+ */
+#define BDW_WA_DWORDS_SIZE 18
+
  /*
   * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
   * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
@@ -148,7 +153,10 @@ struct  intel_engine_cs {

         int             (*init)(struct intel_engine_cs *ring);

-       int             (*init_context)(struct intel_engine_cs *ring);
+       int             (*init_context)(struct intel_ringbuffer *ringbuf);
+
+       void    (*emit_wa)(struct intel_ringbuffer *ringbuf,
+                      u32 addr, u32 value);

         void            (*write_tail)(struct intel_engine_cs *ring,
                                       u32 value);
@@ -427,6 +435,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev);

  u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
  void intel_ring_setup_status_page(struct intel_engine_cs *ring);
+void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf);

  static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
  {
--
2.0.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux