On Tue, Jan 08, 2013 at 10:53:17AM +0000, Chris Wilson wrote: > Using copywinwin10 as an example that is dependent upon emitting a lot > of relocations (2 per operation), we see improvements of: > > c2d/gm45: 618000.0/sec to 623000.0/sec. > i3-330m: 748000.0/sec to 789000.0/sec. > > (measured relative to a baseline with neither optimisations applied). > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> Slurped in patches 6-9 of this series, thanks. -Daniel > --- > drivers/gpu/drm/i915/i915_dma.c | 3 + > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 100 +++++++++++++++++----------- > include/uapi/drm/i915_drm.h | 8 ++- > 3 files changed, 71 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 4b2b55e..ae63318 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > case I915_PARAM_HAS_EXEC_NO_RELOC: > value = 1; > break; > + case I915_PARAM_HAS_EXEC_HANDLE_LUT: > + value = 1; > + break; > default: > DRM_DEBUG_DRIVER("Unknown parameter %d\n", > param->param); > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 7e0dc15..18a6ab7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -39,24 +39,40 @@ > struct eb_objects { > struct list_head objects; > int and; > - struct hlist_head buckets[0]; > + union { > + struct drm_i915_gem_object *lut[0]; > + struct hlist_head buckets[0]; > + }; > }; > > static struct eb_objects * > -eb_create(int size) > +eb_create(struct drm_i915_gem_execbuffer2 *args) > { > - struct eb_objects *eb; > - int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; > - BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); > - while (count > size) > - count >>= 1; > - eb = kzalloc(count*sizeof(struct hlist_head) + > - sizeof(struct eb_objects), > - GFP_KERNEL); > - if (eb == NULL) > - return eb; > - > - eb->and = count - 1; > + struct eb_objects *eb = NULL; > + > + if (args->flags & I915_EXEC_HANDLE_LUT) { > + int size = args->buffer_count; > + size *= sizeof(struct drm_i915_gem_object *); > + size += sizeof(struct eb_objects); > + eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); > + } > + > + if (eb == NULL) { > + int size = args->buffer_count; > + int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; > + BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); > + while (count > 2*size) > + count >>= 1; > + eb = kzalloc(count*sizeof(struct hlist_head) + > + sizeof(struct eb_objects), > + GFP_TEMPORARY); > + if (eb == NULL) > + return eb; > + > + eb->and = count - 1; > + } else > + eb->and = -args->buffer_count; > + > INIT_LIST_HEAD(&eb->objects); > return eb; > } > @@ -64,26 +80,20 @@ eb_create(int size) > static void > eb_reset(struct eb_objects *eb) > { > - memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); > -} > - > -static void > -eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) > -{ > - hlist_add_head(&obj->exec_node, > - &eb->buckets[obj->exec_handle & eb->and]); > + if (eb->and >= 0) > + memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); > } > > static int > eb_lookup_objects(struct eb_objects *eb, > struct drm_i915_gem_exec_object2 *exec, > - int count, > + const struct drm_i915_gem_execbuffer2 *args, > struct drm_file *file) > { > int i; > > spin_lock(&file->table_lock); > - for (i = 0; i < count; i++) { > + for (i = 0; i < args->buffer_count; i++) { > struct drm_i915_gem_object *obj; > > obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); > @@ -104,9 +114,15 @@ eb_lookup_objects(struct eb_objects *eb, > drm_gem_object_reference(&obj->base); > list_add_tail(&obj->exec_list, &eb->objects); > > - obj->exec_handle = exec[i].handle; > obj->exec_entry = &exec[i]; > - eb_add_object(eb, obj); > + if (eb->and < 0) { > + eb->lut[i] = obj; > + } else { > + uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; > + obj->exec_handle = handle; > + hlist_add_head(&obj->exec_node, > + &eb->buckets[handle & eb->and]); > + } > } > spin_unlock(&file->table_lock); > > @@ -116,18 +132,24 @@ eb_lookup_objects(struct eb_objects *eb, > static struct drm_i915_gem_object * > eb_get_object(struct eb_objects *eb, unsigned long handle) > { > - struct hlist_head *head; > - struct hlist_node *node; > - struct drm_i915_gem_object *obj; > + if (eb->and < 0) { > + if (handle >= -eb->and) > + return NULL; > + return eb->lut[handle]; > + } else { > + struct hlist_head *head; > + struct hlist_node *node; > > - head = &eb->buckets[handle & eb->and]; > - hlist_for_each(node, head) { > - obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); > - if (obj->exec_handle == handle) > - return obj; > - } > + head = &eb->buckets[handle & eb->and]; > + hlist_for_each(node, head) { > + struct drm_i915_gem_object *obj; > > - return NULL; > + obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); > + if (obj->exec_handle == handle) > + return obj; > + } > + return NULL; > + } > } > > static void > @@ -624,7 +646,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, > > /* reacquire the objects */ > eb_reset(eb); > - ret = eb_lookup_objects(eb, exec, count, file); > + ret = eb_lookup_objects(eb, exec, args, file); > if (ret) > goto err; > > @@ -934,7 +956,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > goto pre_mutex_err; > } > > - eb = eb_create(args->buffer_count); > + eb = eb_create(args); > if (eb == NULL) { > mutex_unlock(&dev->struct_mutex); > ret = -ENOMEM; > @@ -942,7 +964,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > } > > /* Look up object handles */ > - ret = eb_lookup_objects(eb, exec, args->buffer_count, file); > + ret = eb_lookup_objects(eb, exec, args, file); > if (ret) > goto err; > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 2430b6a..07d5941 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_HAS_SECURE_BATCHES 23 > #define I915_PARAM_HAS_PINNED_BATCHES 24 > #define I915_PARAM_HAS_EXEC_NO_RELOC 25 > +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 > > typedef struct drm_i915_getparam { > int param; > @@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 { > */ > #define I915_EXEC_NO_RELOC (1<<11) > > -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) > +/** Use the reloc.handle as an index into the exec object array rather > + * than as the per-file handle. > + */ > +#define I915_EXEC_HANDLE_LUT (1<<12) > + > +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) > > #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) > #define i915_execbuffer2_set_context_id(eb2, context) \ > -- > 1.7.10.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch