Since we blow the TLB caches by using kmap/kunmap, we may as well go the whole hog and see if declaring our destination page as WC is faster than keeping it as WB and using clflush. It should be! Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_cmd_parser.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 0e826bec7942..f4d4c3132932 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -984,9 +984,10 @@ int i915_parse_cmds(struct intel_engine_cs *ring, const struct drm_i915_cmd_descriptor *desc = &default_desc; u32 last_cmd_header = 0; unsigned dst_iter, src_iter; - int needs_clflush = 0; struct get_page rewind; void *src, *dst; + int src_needs_clflush = 0; + bool dst_needs_clflush; unsigned in, out; u32 *buf, partial = 0, length = 1; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ @@ -999,13 +1000,19 @@ int i915_parse_cmds(struct intel_engine_cs *ring, if (WARN_ON(shadow_batch_obj->pages_pin_count == 0)) return -ENODEV; - ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush); + ret = i915_gem_obj_prepare_shmem_read(batch_obj, &src_needs_clflush); if (ret) { DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); return ret; } - ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true); + dst_needs_clflush = + shadow_batch_obj->base.write_domain != I915_GEM_DOMAIN_CPU && + !INTEL_INFO(shadow_batch_obj->base.dev)->has_llc; + if (dst_needs_clflush) + ret = i915_gem_object_set_to_gtt_domain(shadow_batch_obj, true); + else + ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true); if (ret) { DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); goto unpin; @@ -1035,7 +1042,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring, this = PAGE_SIZE - in; src = kmap_atomic(i915_gem_object_get_page(batch_obj, src_iter)); - if (needs_clflush) + if (src_needs_clflush) drm_clflush_virt_range(src + in, this); i = this; @@ -1054,10 +1061,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring, k = i; if (k > PAGE_SIZE - out) k = PAGE_SIZE - out; - if (k == PAGE_SIZE) + if (k == PAGE_SIZE) { copy_page(dst, src); - else + } else { + /* Partial cache lines need clflushing */ + if (dst_needs_clflush && + (out | k) & (boot_cpu_data.x86_clflush_size - 1)) + drm_clflush_virt_range(dst + out, k); memcpy(dst + out, src + j, k); + } + if (dst_needs_clflush) + drm_clflush_virt_range(dst + out, k); out += k; j += k; -- 2.6.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx