Haswell GT3e has the unique feature of supporting Write-Through cacheing of objects within the eLLC/LLC. The purpose of this is to enable the display plane to remain coherent whilst objects lie resident in the eLLC/LLC - so that we, in theory, get the best of both worlds, perfect display and fast access. v2: Actually do the clflush on transition to WT, nagging by Ville. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> Cc: Kenneth Graunke <kenneth@xxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 4 +++- drivers/gpu/drm/i915/i915_gem.c | 29 +++++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ++++++++++- include/uapi/drm/i915_drm.h | 1 + 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8da0b3d..75989fc 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -976,6 +976,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_LLC: value = HAS_LLC(dev); break; + case I915_PARAM_HAS_WT: + value = HAS_WT(dev); + break; case I915_PARAM_HAS_ALIASING_PPGTT: value = dev_priv->mm.aliasing_ppgtt ? 1 : 0; break; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34d2b9d..d27a82a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -452,6 +452,7 @@ enum i915_cache_level { I915_CACHE_NONE = 0, I915_CACHE_LLC, I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */ + I915_CACHE_WT, }; typedef uint32_t gen6_gtt_pte_t; @@ -1344,7 +1345,7 @@ struct drm_i915_gem_object { unsigned int pending_fenced_gpu_access:1; unsigned int fenced_gpu_access:1; - unsigned int cache_level:2; + unsigned int cache_level:3; unsigned int has_aliasing_ppgtt_mapping:1; unsigned int has_global_gtt_mapping:1; @@ -1547,6 +1548,7 @@ struct drm_i915_file_private { #define HAS_BLT(dev) (INTEL_INFO(dev)->has_blt_ring) #define HAS_VEBOX(dev) (INTEL_INFO(dev)->has_vebox_ring) #define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc) +#define HAS_WT(dev) (IS_HASWELL(dev) && ((struct drm_i915_private *)(dev)->dev_private)->ellc_size) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 5) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 99362f7..6b33494 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3447,27 +3447,27 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, i915_gem_obj_ggtt_set_color(obj, cache_level); } - if (cache_level == I915_CACHE_NONE) { - u32 old_read_domains, old_write_domain; - + if (cache_level == I915_CACHE_NONE || + cache_level == I915_CACHE_WT) { /* If we're coming from LLC cached, then we haven't * actually been tracking whether the data is in the * CPU cache or not, since we only allow one bit set * in obj->write_domain and have been skipping the clflushes. - * Just set it to the CPU cache for now. + * Do them now. */ - WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); - WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); + if (obj->pages && !obj->stolen) { + u32 old_write_domain; - old_read_domains = obj->base.read_domains; - old_write_domain = obj->base.write_domain; + trace_i915_gem_object_clflush(obj); + drm_clflush_sg(obj->pages); - obj->base.read_domains = I915_GEM_DOMAIN_CPU; - obj->base.write_domain = I915_GEM_DOMAIN_CPU; + old_write_domain = obj->base.write_domain; + obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); + trace_i915_gem_object_change_domain(obj, + obj->base.read_domains, + old_write_domain); + } } obj->cache_level = cache_level; @@ -3565,7 +3565,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, * of uncaching, which would allow us to flush all the LLC-cached data * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); + ret = i915_gem_object_set_cache_level(obj, + HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0522d00..072a348 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -54,6 +54,7 @@ (((bits) & 0x8) << (11 - 3))) #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) +#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr, enum i915_cache_level level) @@ -116,8 +117,16 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, gen6_gtt_pte_t pte = GEN6_PTE_VALID; pte |= HSW_PTE_ADDR_ENCODE(addr); - if (level != I915_CACHE_NONE) + switch (level) { + case I915_CACHE_NONE: + break; + case I915_CACHE_WT: + pte |= HSW_WT_ELLC_LLC_AGE0; + break; + default: pte |= HSW_WB_ELLC_LLC_AGE0; + break; + } return pte; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e47cf00..e831292 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -338,6 +338,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PINNED_BATCHES 24 #define I915_PARAM_HAS_EXEC_NO_RELOC 25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 +#define I915_PARAM_HAS_WT 27 typedef struct drm_i915_getparam { int param; -- 1.8.3.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx