[PATCH 4/4] drm/i915: Use the reloc.handle as an index into the execbuffer array

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:

c2d/gm45: 618000.0/sec to 623000.0/sec.
i3-330m: 748000.0/sec to 789000.0/sec.

(measured relative to a baseline with neither optimisations applied).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   90 +++++++++++++++++-----------
 include/uapi/drm/i915_drm.h                |    6 ++
 3 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a0c4b4f..a35217d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1020,6 +1020,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_NO_RELOC:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 30beea6..9ccd860 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,25 +39,40 @@
 
 struct eb_objects {
 	struct list_head objects;
-	int and;
-	struct hlist_head buckets[0];
+	unsigned int and;
+	union {
+		struct drm_i915_gem_object *lut[0];
+		struct hlist_head buckets[0];
+	};
 };
 
 static struct eb_objects *
-eb_create(int size)
+eb_create(struct drm_i915_gem_execbuffer2 *args)
 {
-	struct eb_objects *eb;
-	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
-	BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
-	while (count > size)
-		count >>= 1;
-	eb = kzalloc(count*sizeof(struct hlist_head) +
-		     sizeof(struct eb_objects),
-		     GFP_KERNEL);
-	if (eb == NULL)
-		return eb;
-
-	eb->and = count - 1;
+	struct eb_objects *eb = NULL;
+
+	if (args->flags & I915_EXEC_HANDLE_LUT) {
+		int size = args->buffer_count;
+		size *= sizeof(struct drm_i915_gem_object);
+		size += sizeof(struct eb_objects);
+		eb = kzalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	}
+
+	if (eb == NULL) {
+		int size = args->buffer_count;
+		int count = (PAGE_SIZE - sizeof(struct eb_objects)) / sizeof(struct hlist_head);
+		BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
+		while (count > 2*size)
+			count >>= 1;
+		eb = kzalloc(count*sizeof(struct hlist_head) +
+			     sizeof(struct eb_objects),
+			     GFP_TEMPORARY);
+		if (eb == NULL)
+			return eb;
+
+		eb->and = count - 1;
+	}
+
 	INIT_LIST_HEAD(&eb->objects);
 	return eb;
 }
@@ -65,14 +80,8 @@ eb_create(int size)
 static void
 eb_reset(struct eb_objects *eb)
 {
-	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
-}
-
-static void
-eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
-{
-	hlist_add_head(&obj->exec_node,
-		       &eb->buckets[obj->exec_handle & eb->and]);
+	if (eb->and)
+		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 }
 
 static int
@@ -105,9 +114,14 @@ eb_lookup_objects(struct eb_objects *eb,
 		drm_gem_object_reference(&obj->base);
 		list_add_tail(&obj->exec_list, &eb->objects);
 
-		obj->exec_handle = exec[i].handle;
 		obj->exec_entry = &exec[i];
-		eb_add_object(eb, obj);
+		if (eb->and == 0) {
+			eb->lut[i] = obj;
+		} else {
+			obj->exec_handle = exec[i].handle;
+			hlist_add_head(&obj->exec_node,
+				       &eb->buckets[exec[i].handle & eb->and]);
+		}
 	}
 	spin_unlock(&file->table_lock);
 
@@ -117,18 +131,22 @@ eb_lookup_objects(struct eb_objects *eb,
 static struct drm_i915_gem_object *
 eb_get_object(struct eb_objects *eb, unsigned long handle)
 {
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct drm_i915_gem_object *obj;
+	if (eb->and == 0) {
+		return eb->lut[handle];
+	} else {
+		struct hlist_head *head;
+		struct hlist_node *node;
 
-	head = &eb->buckets[handle & eb->and];
-	hlist_for_each(node, head) {
-		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
-		if (obj->exec_handle == handle)
-			return obj;
-	}
+		head = &eb->buckets[handle & eb->and];
+		hlist_for_each(node, head) {
+			struct drm_i915_gem_object *obj;
 
-	return NULL;
+			obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
+			if (obj->exec_handle == handle)
+				return obj;
+		}
+		return NULL;
+	}
 }
 
 static void
@@ -988,7 +1006,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto pre_mutex_err;
 	}
 
-	eb = eb_create(args->buffer_count);
+	eb = eb_create(args);
 	if (eb == NULL) {
 		mutex_unlock(&dev->struct_mutex);
 		ret = -ENOMEM;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7657d3e..82c1088 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -310,6 +310,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_RSVD_FOR_FUTURE_USE	 22
 #define I915_PARAM_HAS_SECURE_BATCHES	 23
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 24
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT   25
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -690,6 +691,11 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_NO_RELOC		(1<<10)
 
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT		(1<<11)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
-- 
1.7.10.4



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux