If we're moving a bunch of buffers from the CPU domain to the GPU domain, and we've already blown out the entire cache via a wbinvd, there is nothing more to do. With this and the previous patches, I am seeing a 3x FPS increase on a certain benchmark which uses a giant 2d array texture. Unless I missed something in the code, it should only effect non-LLC i915 platforms. I haven't yet run any numbers for other benchmarks, nor have I attempted to check if various conformance tests still pass. NOTE: As mentioned in the previous patch, if one can easily obtain the largest buffer and attempt to flush it first, the results would be even more desirable. Cc: DRI Development <dri-devel@xxxxxxxxxxxxxxxxxxxxx> Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 12 +++++------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++++--- drivers/gpu/drm/i915/intel_lrc.c | 8 +++++--- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d68c75f..fdb92a3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2642,7 +2642,8 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv) } void i915_gem_reset(struct drm_device *dev); -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +enum drm_cache_flush +i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int i915_gem_init_rings(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index de241eb..3746738 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3608,7 +3608,7 @@ err_unpin: return vma; } -bool +enum drm_cache_flush i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force) { @@ -3617,14 +3617,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * again at bind time. */ if (obj->pages == NULL) - return false; + return DRM_CACHE_FLUSH_NONE; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. */ if (obj->stolen || obj->phys_handle) - return false; + return DRM_CACHE_FLUSH_NONE; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -3635,12 +3635,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * tracking. */ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) - return false; + return DRM_CACHE_FLUSH_NONE; trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->pages); - - return true; + return drm_clflush_sg(obj->pages); } /** Flushes the GTT write domain for the object if it's dirty. */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0c25f62..e8eb9e9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -827,7 +827,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, { struct i915_vma *vma; uint32_t flush_domains = 0; - bool flush_chipset = false; + enum drm_cache_flush flush_chipset = DRM_CACHE_FLUSH_NONE; int ret; list_for_each_entry(vma, vmas, exec_list) { @@ -836,8 +836,10 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, if (ret) return ret; - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU && + flush_chipset != DRM_CACHE_FLUSH_WBINVD) { + flush_chipset = i915_gem_clflush_object(obj, false); + } flush_domains |= obj->base.write_domain; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 89b5577..a6c6ebd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -611,7 +611,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, struct intel_engine_cs *ring = ringbuf->ring; struct i915_vma *vma; uint32_t flush_domains = 0; - bool flush_chipset = false; + enum drm_cache_flush flush_chipset = DRM_CACHE_FLUSH_NONE; int ret; list_for_each_entry(vma, vmas, exec_list) { @@ -621,8 +621,10 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, if (ret) return ret; - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU && + flush_chipset != DRM_CACHE_FLUSH_WBINVD) { + flush_chipset = i915_gem_clflush_object(obj, false); + } flush_domains |= obj->base.write_domain; } -- 2.1.3 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel