We have to do this manually. Somebody had a Great Idea. I've measured speed-ups just a few percent above the noise level (below 5% for the best case), but no slowdows. Chris Wilson measured quite a bit more (10-20% above the usual snb variance) on a more recent and better tuned version of sna, but also recorded a few slow-downs on benchmarks know for uglier amounts of snb-induced variance. v2: Incorporate Ben Widawsky's preliminary review comments and elaborate a bit about the performance impact in the changelog. Acked-by: Chris Wilson <chris at chris-wilson.co.uk> Signed-Off-by: Daniel Vetter <daniel.vetter at ffwll.ch> --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 4 ++- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 23 +++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_tiling.c | 16 +++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 34 ++++++++++++++++++++++++++++++++ 6 files changed, 75 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 3f27173..dfef956 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev) i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE); mutex_lock(&dev->struct_mutex); - ret = i915_gem_init_ringbuffer(dev); + ret = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1658cfd..12ddf47 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev) mutex_lock(&dev->struct_mutex); dev_priv->mm.suspended = 0; - error = i915_gem_init_ringbuffer(dev); + error = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); if (HAS_PCH_SPLIT(dev)) @@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags) !dev_priv->mm.suspended) { dev_priv->mm.suspended = 0; + i915_gem_init_swizzling(dev); + dev_priv->ring[RCS].init(&dev_priv->ring[RCS]); if (HAS_BSD(dev)) dev_priv->ring[VCS].init(&dev_priv->ring[VCS]); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 865de80..0845419 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, uint32_t read_domains, uint32_t write_domain); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); -int __must_check i915_gem_init_ringbuffer(struct drm_device *dev); +int __must_check i915_gem_init_hw(struct drm_device *dev); +void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); void i915_gem_do_init(struct drm_device *dev, unsigned long start, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 51a2b0c..86fffd2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev) return 0; } +void i915_gem_init_swizzling(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + + if (INTEL_INFO(dev)->gen < 6 || + dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + return; + + I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | + DISP_TILE_SURFACE_SWIZZLING); + + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); + if (IS_GEN6(dev)) + I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB)); + else + I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB)); +} int -i915_gem_init_ringbuffer(struct drm_device *dev) +i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; + i915_gem_init_swizzling(dev); + ret = intel_init_render_ring_buffer(dev); if (ret) return ret; @@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); dev_priv->mm.suspended = 0; - ret = i915_gem_init_ringbuffer(dev); + ret = i915_gem_init_hw(dev); if (ret != 0) { mutex_unlock(&dev->struct_mutex); return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 861223b..acf89fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -93,8 +93,20 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; if (INTEL_INFO(dev)->gen >= 6) { - swizzle_x = I915_BIT_6_SWIZZLE_NONE; - swizzle_y = I915_BIT_6_SWIZZLE_NONE; + uint32_t dimm_c0, dimm_c1; + dimm_c0 = I915_READ(MAD_DIMM_C0); + dimm_c1 = I915_READ(MAD_DIMM_C1); + dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; + /* Enable swizzling when the channels are populated with + * identically sized dimms. */ + if (dimm_c0 == dimm_c1) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } } else if (IS_GEN5(dev)) { /* On Ironlake whatever DRAM config, GPU always do * same swizzling setup. diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f960738..539ef90 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -295,6 +295,12 @@ #define FENCE_REG_SANDYBRIDGE_0 0x100000 #define SANDYBRIDGE_FENCE_PITCH_SHIFT 32 +/* control register for cpu gtt access */ +#define TILECTL 0x101000 +#define TILECTL_SWZCTL (1 << 0) +#define TILECTL_TLB_PREFETCH_DIS (1 << 2) +#define TILECTL_BACKSNOOP_DIS (1 << 3) + /* * Instruction and interrupt control regs */ @@ -318,6 +324,11 @@ #define RING_MAX_IDLE(base) ((base)+0x54) #define RING_HWS_PGA(base) ((base)+0x80) #define RING_HWS_PGA_GEN6(base) ((base)+0x2080) +#define ARB_MODE 0x04030 +#define ARB_MODE_SWIZZLE_SNB (1<<4) +#define ARB_MODE_SWIZZLE_IVB (1<<5) +#define ARB_MODE_ENABLE(x) GFX_MODE_ENABLE(x) +#define ARB_MODE_DISABLE(x) GFX_MODE_DISABLE(x) #define RENDER_HWS_PGA_GEN7 (0x04080) #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) #define DONE_REG 0x40b0 @@ -1037,6 +1048,29 @@ #define C0DRB3 0x10206 #define C1DRB3 0x10606 +/** snb MCH registers for reading the DRAM channel configuration */ +#define MAD_DIMM_C0 (MCHBAR_MIRROR_BASE_SNB + 0x5004) +#define MAD_DIMM_C1 (MCHBAR_MIRROR_BASE_SNB + 0x5008) +#define MAD_DIMM_C2 (MCHBAR_MIRROR_BASE_SNB + 0x500C) +#define MAD_DIMM_ECC_MASK (0x3 << 24) +#define MAD_DIMM_ECC_OFF (0x0 << 24) +#define MAD_DIMM_ECC_IO_ON_LOGIC_OFF (0x1 << 24) +#define MAD_DIMM_ECC_IO_OFF_LOGIC_ON (0x2 << 24) +#define MAD_DIMM_ECC_ON (0x3 << 24) +#define MAD_DIMM_ENH_INTERLEAVE (0x1 << 22) +#define MAD_DIMM_RANK_INTERLEAVE (0x1 << 21) +#define MAD_DIMM_B_WIDTH_X16 (0x1 << 20) /* X8 chips if unset */ +#define MAD_DIMM_A_WIDTH_X16 (0x1 << 19) /* X8 chips if unset */ +#define MAD_DIMM_B_DUAL_RANK (0x1 << 18) +#define MAD_DIMM_A_DUAL_RANK (0x1 << 17) +#define MAD_DIMM_A_SELECT (0x1 << 16) +/* DIMM sizes are in multiples of 256mb. */ +#define MAD_DIMM_B_SIZE_SHIFT 8 +#define MAD_DIMM_B_SIZE_MASK (0xff << MAD_DIMM_B_SIZE_SHIFT) +#define MAD_DIMM_A_SIZE_SHIFT 0 +#define MAD_DIMM_A_SIZE_MASK (0xff << MAD_DIMM_A_SIZE_SHIFT) + + /* Clocking configuration register */ #define CLKCFG 0x10c00 #define CLKCFG_FSB_400 (5 << 0) /* hrawclk 100 */ -- 1.7.7.5