From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> This eliminates six needless spin lock/unlock pairs when writing out ELSP. Apart from tidier code main benefit is between 0.51% and 0.73% speedup on some OGL tests under CHV (bench_OglBatch4 bench_OglDeferred respectively). Kindly benchmarked by Ben Widawsky. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Dave Gordon <david.s.gordon@xxxxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Cc: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 13 ++++++------- drivers/gpu/drm/i915/intel_uncore.c | 14 -------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 66f0c60..33d577a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3197,6 +3197,21 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val); #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg) #define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg) +/* Raw MMIO access with no forcewake handling, use with care. */ +#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__)) +#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__)) + +#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__)) +#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__)) + +#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__)) +#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__)) + +#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__)) +#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__)) + +#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__) + /* "Broadcast RGB" property */ #define INTEL_BROADCAST_RGB_AUTO 0 #define INTEL_BROADCAST_RGB_FULL 1 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e405b61..e22b866 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -305,6 +305,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, * Instead, we do the runtime_pm_get/put when creating/destroying requests. */ spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) { if (dev_priv->uncore.fw_rendercount++ == 0) dev_priv->uncore.funcs.force_wake_get(dev_priv, @@ -322,19 +323,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); } - spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); - I915_WRITE(RING_ELSP(ring), desc[1]); - I915_WRITE(RING_ELSP(ring), desc[0]); - I915_WRITE(RING_ELSP(ring), desc[3]); + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[1]); + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[0]); + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[3]); /* The context is automatically loaded after the following */ - I915_WRITE(RING_ELSP(ring), desc[2]); + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[2]); /* ELSP is a wo register, so use another nearby reg for posting instead */ - POSTING_READ(RING_EXECLIST_STATUS(ring)); + __raw_posting_read(dev_priv, RING_EXECLIST_STATUS(ring)); /* Release Force Wakeup (see the big comment above). */ - spin_lock_irqsave(&dev_priv->uncore.lock, flags); if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) { if (--dev_priv->uncore.fw_rendercount == 0) dev_priv->uncore.funcs.force_wake_put(dev_priv, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e9561de..9a31932 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -26,20 +26,6 @@ #define FORCEWAKE_ACK_TIMEOUT_MS 2 -#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__)) -#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__)) - -#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__)) -#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__)) - -#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__)) -#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__)) - -#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__)) -#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__)) - -#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__) - static void assert_device_not_suspended(struct drm_i915_private *dev_priv) { -- 2.2.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx