From: Deepak S <deepak.s@xxxxxxxxx> Split vlv force wake routines to help individually control Media/Render well based on the register access. We've seen power savings in the lower sub-1W range on workloads that only need on of the power wells, e.g. glbenchmark, media playback Signed-off-by: Deepak S <deepak.s@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 17 +++ drivers/gpu/drm/i915/i915_reg.h | 4 +- drivers/gpu/drm/i915/intel_pm.c | 7 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +- drivers/gpu/drm/i915/intel_uncore.c | 183 +++++++++++++++++++++++++++----- 5 files changed, 186 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7ec24eb..40d0288 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -464,6 +464,9 @@ struct intel_uncore { unsigned fifo_count; unsigned forcewake_count; + unsigned fw_rendercount; + unsigned fw_mediacount; + struct delayed_work force_wake_work; }; @@ -2461,6 +2464,20 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val); int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val); +void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine); +void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine); + +#define FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg) \ + (((reg) >= 0x2000 && (reg) < 0x4000) ||\ + ((reg) >= 0x5000 && (reg) < 0x8000) ||\ + ((reg) >= 0xB000 && (reg) < 0x12000) ||\ + ((reg) >= 0x2E000 && (reg) < 0x30000)) + +#define FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)\ + (((reg) >= 0x12000 && (reg) < 0x14000) ||\ + ((reg) >= 0x22000 && (reg) < 0x24000) ||\ + ((reg) >= 0x30000 && (reg) < 0x40000)) + #define FORCEWAKE_RENDER (1 << 0) #define FORCEWAKE_MEDIA (1 << 1) #define FORCEWAKE_ALL (FORCEWAKE_RENDER | FORCEWAKE_MEDIA) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9292c9e..04d46b2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4840,8 +4840,8 @@ #define EDP_LINK_TRAIN_VOL_EMP_MASK_IVB (0x3f<<22) #define FORCEWAKE 0xA18C -#define FORCEWAKE_VLV 0x1300b0 -#define FORCEWAKE_ACK_VLV 0x1300b4 +#define FORCEWAKE_VLV 0x1300b0 +#define FORCEWAKE_ACK_VLV 0x1300b4 #define FORCEWAKE_MEDIA_VLV 0x1300b8 #define FORCEWAKE_ACK_MEDIA_VLV 0x1300bc #define FORCEWAKE_ACK_HSW 0x130044 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c2dff4a..c00e987 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -191,7 +191,10 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev) u32 blt_ecoskpd; /* Make sure blitter notifies FBC of writes */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + /* Blitter is part of Media powerwell on VLV. No impact of + * his param in other platforms for now */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA); blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << @@ -204,7 +207,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev) I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); POSTING_READ(GEN6_BLITTER_ECOSKPD); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA); } static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6c7ce7b..3079853 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1033,7 +1033,10 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) /* It looks like we need to prevent the gt from suspending while waiting * for an notifiy irq, otherwise irqs seem to get lost on at least the * blt/bsd rings on ivb. */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + if (ring->id == RCS) + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_RENDER); + else + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA); spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) { @@ -1067,7 +1070,10 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) } spin_unlock_irqrestore(&dev_priv->irq_lock, flags); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + if (ring->id == RCS) + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_RENDER); + else + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA); } static bool diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c6acbb2..7619d87 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -175,38 +175,112 @@ static void vlv_force_wake_reset(struct drm_i915_private *dev_priv) __raw_posting_read(dev_priv, FORCEWAKE_ACK_VLV); } -static void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) -{ - if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0, - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); - - __raw_i915_write32(dev_priv, FORCEWAKE_VLV, - _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); - __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, - _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); - - if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n"); +static void __vlv_force_wake_get(struct drm_i915_private *dev_priv, + int fw_engine) +{ + /* Check for Render Engine */ + if (FORCEWAKE_RENDER & fw_engine) { + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_VLV) & + FORCEWAKE_KERNEL) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: Render forcewake old ack to clear.\n"); + + __raw_i915_write32(dev_priv, FORCEWAKE_VLV, + _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_VLV) & + FORCEWAKE_KERNEL), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: waiting for Render to ack.\n"); + } - if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) & - FORCEWAKE_KERNEL), - FORCEWAKE_ACK_TIMEOUT_MS)) - DRM_ERROR("Timed out waiting for media to ack forcewake request.\n"); + /* Check for Media Engine */ + if (FORCEWAKE_MEDIA & fw_engine) { + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_MEDIA_VLV) & + FORCEWAKE_KERNEL) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: Media forcewake old ack to clear.\n"); + + __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, + _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + FORCEWAKE_ACK_MEDIA_VLV) & + FORCEWAKE_KERNEL), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out: waiting for media to ack.\n"); + } /* WaRsForcewakeWaitTC0:vlv */ __gen6_gt_wait_for_thread_c0(dev_priv); + } -static void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) +static void __vlv_force_wake_put(struct drm_i915_private *dev_priv, + int fw_engine) { - __raw_i915_write32(dev_priv, FORCEWAKE_VLV, - _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); - __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, - _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + + /* Check for Render Engine */ + if (FORCEWAKE_RENDER & fw_engine) + __raw_i915_write32(dev_priv, FORCEWAKE_VLV, + _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + + + /* Check for Media Engine */ + if (FORCEWAKE_MEDIA & fw_engine) + __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, + _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + /* The below doubles as a POSTING_READ */ gen6_gt_check_fifodbg(dev_priv); + +} + +void vlv_force_wake_get(struct drm_i915_private *dev_priv, + int fw_engine) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + if (FORCEWAKE_RENDER & fw_engine) { + if (dev_priv->uncore.fw_rendercount++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, + FORCEWAKE_RENDER); + } + if (FORCEWAKE_MEDIA & fw_engine) { + if (dev_priv->uncore.fw_mediacount++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, + FORCEWAKE_MEDIA); + } + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); +} + +void vlv_force_wake_put(struct drm_i915_private *dev_priv, + int fw_engine) +{ + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + + if (FORCEWAKE_RENDER & fw_engine) { + WARN_ON(dev_priv->uncore.fw_rendercount == 0); + if (--dev_priv->uncore.fw_rendercount == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, + FORCEWAKE_RENDER); + } + + if (FORCEWAKE_MEDIA & fw_engine) { + WARN_ON(dev_priv->uncore.fw_mediacount == 0); + if (--dev_priv->uncore.fw_mediacount == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, + FORCEWAKE_MEDIA); + } + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } static void gen6_force_wake_work(struct work_struct *work) @@ -292,6 +366,10 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine) if (!dev_priv->uncore.funcs.force_wake_get) return; + /* Redirect to VLV specific routine */ + if (IS_VALLEYVIEW(dev_priv->dev)) + return vlv_force_wake_get(dev_priv, fw_engine); + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (dev_priv->uncore.forcewake_count++ == 0) dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); @@ -308,6 +386,11 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine) if (!dev_priv->uncore.funcs.force_wake_put) return; + /* Redirect to VLV specific routine */ + if (IS_VALLEYVIEW(dev_priv->dev)) + return vlv_force_wake_put(dev_priv, fw_engine); + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (--dev_priv->uncore.forcewake_count == 0) { dev_priv->uncore.forcewake_count++; @@ -395,6 +478,39 @@ gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ REG_READ_FOOTER; \ } +#define __vlv_read(x) \ +static u##x \ +vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \ + unsigned fwengine = 0; \ + unsigned *fwcount = 0; \ + REG_READ_HEADER(x); \ + if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) { \ + fwengine = FORCEWAKE_RENDER; \ + fwcount = &dev_priv->uncore.fw_rendercount; \ + } \ + else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) { \ + fwengine = FORCEWAKE_MEDIA; \ + fwcount = &dev_priv->uncore.fw_mediacount; \ + } \ + if (fwengine != 0) { \ + if ((*fwcount)++ == 0) \ + (dev_priv)->uncore.funcs.force_wake_get(dev_priv, \ + fwengine); \ + val = __raw_i915_read##x(dev_priv, reg); \ + if (--(*fwcount) == 0) \ + (dev_priv)->uncore.funcs.force_wake_put(dev_priv, \ + FORCEWAKE_ALL); \ + } else { \ + val = __raw_i915_read##x(dev_priv, reg); \ + } \ + REG_READ_FOOTER; \ +} + + +__vlv_read(8) +__vlv_read(16) +__vlv_read(32) +__vlv_read(64) __gen6_read(8) __gen6_read(16) __gen6_read(32) @@ -408,6 +524,7 @@ __gen4_read(16) __gen4_read(32) __gen4_read(64) +#undef __vlv_read #undef __gen6_read #undef __gen5_read #undef __gen4_read @@ -542,8 +659,8 @@ void intel_uncore_init(struct drm_device *dev) gen6_force_wake_work); if (IS_VALLEYVIEW(dev)) { - dev_priv->uncore.funcs.force_wake_get = vlv_force_wake_get; - dev_priv->uncore.funcs.force_wake_put = vlv_force_wake_put; + dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get; + dev_priv->uncore.funcs.force_wake_put = __vlv_force_wake_put; } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { dev_priv->uncore.funcs.force_wake_get = __gen6_gt_force_wake_mt_get; dev_priv->uncore.funcs.force_wake_put = __gen6_gt_force_wake_mt_put; @@ -609,10 +726,18 @@ void intel_uncore_init(struct drm_device *dev) dev_priv->uncore.funcs.mmio_writel = gen6_write32; dev_priv->uncore.funcs.mmio_writeq = gen6_write64; } - dev_priv->uncore.funcs.mmio_readb = gen6_read8; - dev_priv->uncore.funcs.mmio_readw = gen6_read16; - dev_priv->uncore.funcs.mmio_readl = gen6_read32; - dev_priv->uncore.funcs.mmio_readq = gen6_read64; + + if (IS_VALLEYVIEW(dev)) { + dev_priv->uncore.funcs.mmio_readb = vlv_read8; + dev_priv->uncore.funcs.mmio_readw = vlv_read16; + dev_priv->uncore.funcs.mmio_readl = vlv_read32; + dev_priv->uncore.funcs.mmio_readq = vlv_read64; + } else { + dev_priv->uncore.funcs.mmio_readb = gen6_read8; + dev_priv->uncore.funcs.mmio_readw = gen6_read16; + dev_priv->uncore.funcs.mmio_readl = gen6_read32; + dev_priv->uncore.funcs.mmio_readq = gen6_read64; + } break; case 5: dev_priv->uncore.funcs.mmio_writeb = gen5_write8; -- 1.8.4.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx