[PATCH 55/70] drm/i915: Use WC copies on !llc platforms for the command parser

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since we blow the TLB caches by using kmap/kunmap, we may as well go the
whole hog and see if declaring our destination page as WC is faster than
keeping it as WB and using clflush. It should be!

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index f4d3e7dc3835..61248223f95b 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -918,8 +918,9 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 	struct drm_i915_cmd_descriptor default_desc = { CMD_DESC_SKIP };
 	const struct drm_i915_cmd_descriptor *desc = &default_desc;
 	u32 last_cmd_header = 0;
-	int needs_clflush = 0;
 	void *src, *dst;
+	int src_needs_clflush = 0;
+	bool dst_needs_clflush;
 	unsigned in, out;
 	u32 *buf, partial = 0, length = 1;
 	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
@@ -932,13 +933,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 	if (WARN_ON(shadow_batch_obj->pages_pin_count == 0))
 		return -ENODEV;
 
-	ret = i915_gem_obj_prepare_shmem_read(batch_obj, &needs_clflush);
+	ret = i915_gem_obj_prepare_shmem_read(batch_obj, &src_needs_clflush);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
 		return ret;
 	}
 
-	ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true);
+	dst_needs_clflush = !INTEL_INFO(shadow_batch_obj->base.dev)->has_llc;
+	if (dst_needs_clflush)
+		ret = i915_gem_object_set_to_gtt_domain(shadow_batch_obj, true);
+	else
+		ret = i915_gem_object_set_to_cpu_domain(shadow_batch_obj, true);
 	if (ret) {
 		DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
 		goto unpin;
@@ -972,7 +977,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 			this = PAGE_SIZE - in;
 
 		src = kmap_atomic(sg_page_iter_page(&src_iter));
-		if (needs_clflush)
+		if (src_needs_clflush)
 			drm_clflush_virt_range(src + in, this);
 
 		i = this;
@@ -984,6 +989,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 			 */
 			if (out == PAGE_SIZE) {
 				__sg_page_iter_next(&dst_iter);
+				if (dst_needs_clflush)
+					drm_clflush_virt_range(dst, PAGE_SIZE);
 				kunmap_atomic(dst);
 				dst = kmap_atomic(sg_page_iter_page(&dst_iter));
 				out = 0;
@@ -1104,6 +1111,8 @@ check:
 
 unmap:
 	kunmap_atomic(src);
+	if (dst_needs_clflush)
+		drm_clflush_virt_range(dst, PAGE_SIZE);
 	kunmap_atomic(dst);
 unpin:
 	i915_gem_object_unpin_pages(batch_obj);
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux