[PATCH 09/11] drm/i915: Use the reloc.handle as an index into the execbuffer array

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:

c2d/gm45: 618000.0/sec to 623000.0/sec.
i3-330m: 748000.0/sec to 789000.0/sec.

(measured relative to a baseline with neither optimisations applied).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  100 +++++++++++++++++-----------
 include/uapi/drm/i915_drm.h                |    8 ++-
 3 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4b2b55e..ae63318 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_NO_RELOC:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7e0dc15..18a6ab7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,24 +39,40 @@
 struct eb_objects {
 	struct list_head objects;
 	int and;
-	struct hlist_head buckets[0];
+	union {
+		struct drm_i915_gem_object *lut[0];
+		struct hlist_head buckets[0];
+	};
 };
 
 static struct eb_objects *
-eb_create(int size)
+eb_create(struct drm_i915_gem_execbuffer2 *args)
 {
-	struct eb_objects *eb;
-	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
-	BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
-	while (count > size)
-		count >>= 1;
-	eb = kzalloc(count*sizeof(struct hlist_head) +
-		     sizeof(struct eb_objects),
-		     GFP_KERNEL);
-	if (eb == NULL)
-		return eb;
-
-	eb->and = count - 1;
+	struct eb_objects *eb = NULL;
+
+	if (args->flags & I915_EXEC_HANDLE_LUT) {
+		int size = args->buffer_count;
+		size *= sizeof(struct drm_i915_gem_object *);
+		size += sizeof(struct eb_objects);
+		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	}
+
+	if (eb == NULL) {
+		int size = args->buffer_count;
+		int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
+		BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
+		while (count > 2*size)
+			count >>= 1;
+		eb = kzalloc(count*sizeof(struct hlist_head) +
+			     sizeof(struct eb_objects),
+			     GFP_TEMPORARY);
+		if (eb == NULL)
+			return eb;
+
+		eb->and = count - 1;
+	} else
+		eb->and = -args->buffer_count;
+
 	INIT_LIST_HEAD(&eb->objects);
 	return eb;
 }
@@ -64,26 +80,20 @@ eb_create(int size)
 static void
 eb_reset(struct eb_objects *eb)
 {
-	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
-}
-
-static void
-eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
-{
-	hlist_add_head(&obj->exec_node,
-		       &eb->buckets[obj->exec_handle & eb->and]);
+	if (eb->and >= 0)
+		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 }
 
 static int
 eb_lookup_objects(struct eb_objects *eb,
 		  struct drm_i915_gem_exec_object2 *exec,
-		  int count,
+		  const struct drm_i915_gem_execbuffer2 *args,
 		  struct drm_file *file)
 {
 	int i;
 
 	spin_lock(&file->table_lock);
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < args->buffer_count; i++) {
 		struct drm_i915_gem_object *obj;
 
 		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
@@ -104,9 +114,15 @@ eb_lookup_objects(struct eb_objects *eb,
 		drm_gem_object_reference(&obj->base);
 		list_add_tail(&obj->exec_list, &eb->objects);
 
-		obj->exec_handle = exec[i].handle;
 		obj->exec_entry = &exec[i];
-		eb_add_object(eb, obj);
+		if (eb->and < 0) {
+			eb->lut[i] = obj;
+		} else {
+			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
+			obj->exec_handle = handle;
+			hlist_add_head(&obj->exec_node,
+				       &eb->buckets[handle & eb->and]);
+		}
 	}
 	spin_unlock(&file->table_lock);
 
@@ -116,18 +132,24 @@ eb_lookup_objects(struct eb_objects *eb,
 static struct drm_i915_gem_object *
 eb_get_object(struct eb_objects *eb, unsigned long handle)
 {
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct drm_i915_gem_object *obj;
+	if (eb->and < 0) {
+		if (handle >= -eb->and)
+			return NULL;
+		return eb->lut[handle];
+	} else {
+		struct hlist_head *head;
+		struct hlist_node *node;
 
-	head = &eb->buckets[handle & eb->and];
-	hlist_for_each(node, head) {
-		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
-		if (obj->exec_handle == handle)
-			return obj;
-	}
+		head = &eb->buckets[handle & eb->and];
+		hlist_for_each(node, head) {
+			struct drm_i915_gem_object *obj;
 
-	return NULL;
+			obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
+			if (obj->exec_handle == handle)
+				return obj;
+		}
+		return NULL;
+	}
 }
 
 static void
@@ -624,7 +646,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 
 	/* reacquire the objects */
 	eb_reset(eb);
-	ret = eb_lookup_objects(eb, exec, count, file);
+	ret = eb_lookup_objects(eb, exec, args, file);
 	if (ret)
 		goto err;
 
@@ -934,7 +956,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto pre_mutex_err;
 	}
 
-	eb = eb_create(args->buffer_count);
+	eb = eb_create(args);
 	if (eb == NULL) {
 		mutex_unlock(&dev->struct_mutex);
 		ret = -ENOMEM;
@@ -942,7 +964,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Look up object handles */
-	ret = eb_lookup_objects(eb, exec, args->buffer_count, file);
+	ret = eb_lookup_objects(eb, exec, args, file);
 	if (ret)
 		goto err;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2430b6a..07d5941 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_SECURE_BATCHES	 23
 #define I915_PARAM_HAS_PINNED_BATCHES	 24
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_NO_RELOC		(1<<11)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1)
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT		(1<<12)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.7.10.4



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux