On 03/09/2015 09:55 AM, Chris Wilson wrote:
Now with the trimmed memcpy before the command parser, we try to allocate many different sizes of batches, predominantly one or two pages. We can therefore speed up searching for a good sized batch by keeping the objects of buckets of roughly the same size. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 46 +++++++++++++++++++----------- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 45 +++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_batch_pool.h | 2 +- 5 files changed, 60 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d8ca5c89647c..042ad2fec484 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -378,15 +378,17 @@ static void print_batch_pool_stats(struct seq_file *m, struct drm_i915_gem_object *obj; struct file_stats stats; struct intel_engine_cs *ring; - int i; + int i, j; memset(&stats, 0, sizeof(stats)); for_each_ring(ring, dev_priv, i) { - list_for_each_entry(obj, - &ring->batch_pool.cache_list, - batch_pool_list) - per_file_stats(0, obj, &stats); + for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) { + list_for_each_entry(obj, + &ring->batch_pool.cache_list[j], + batch_pool_link) + per_file_stats(0, obj, &stats); + } } print_file_stats(m, "batch pool", stats); @@ -624,26 +626,38 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct intel_engine_cs *ring; - int count = 0; - int ret, i; + int total = 0; + int ret, i, j; ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; for_each_ring(ring, dev_priv, i) { - seq_printf(m, "%s cache:\n", ring->name); - list_for_each_entry(obj, - &ring->batch_pool.cache_list, - batch_pool_list) { - seq_puts(m, " "); - describe_obj(m, obj); - seq_putc(m, '\n'); - count++; + for (j = 0; j < ARRAY_SIZE(ring->batch_pool.cache_list); j++) { + int count; + + count = 0; + list_for_each_entry(obj, + &ring->batch_pool.cache_list[j], + batch_pool_link) + count++; + seq_printf(m, "%s cache[%d]: %d objects\n", + ring->name, j, count); + + list_for_each_entry(obj, + &ring->batch_pool.cache_list[j], + batch_pool_link) { + seq_puts(m, " "); + describe_obj(m, obj); + seq_putc(m, '\n'); + } + + total += count; } } - seq_printf(m, "total: %d\n", count); + seq_printf(m, "total: %d\n", total); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9fe1274cfe59..5c7b89bd138d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1893,7 +1893,7 @@ struct drm_i915_gem_object { /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; - struct list_head batch_pool_list; + struct list_head batch_pool_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index efb5545251c7..a0c35f80836c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4439,7 +4439,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); - INIT_LIST_HEAD(&obj->batch_pool_list); + INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 1287abf55b84..23374352d8eb 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -47,8 +47,12 @@ void i915_gem_batch_pool_init(struct drm_device *dev, struct i915_gem_batch_pool *pool) { + int n; + pool->dev = dev; - INIT_LIST_HEAD(&pool->cache_list); + + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) + INIT_LIST_HEAD(&pool->cache_list[n]); } /** @@ -59,16 +63,20 @@ void i915_gem_batch_pool_init(struct drm_device *dev, */ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) { + int n; + WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); - while (!list_empty(&pool->cache_list)) { - struct drm_i915_gem_object *obj = - list_first_entry(&pool->cache_list, - struct drm_i915_gem_object, - batch_pool_list); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + while (!list_empty(&pool->cache_list[n])) { + struct drm_i915_gem_object *obj = + list_first_entry(&pool->cache_list[n], + struct drm_i915_gem_object, + batch_pool_link); - list_del(&obj->batch_pool_list); - drm_gem_object_unreference(&obj->base); + list_del(&obj->batch_pool_link); + drm_gem_object_unreference(&obj->base); + } } } @@ -91,28 +99,29 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, { struct drm_i915_gem_object *obj = NULL; struct drm_i915_gem_object *tmp, *next; + struct list_head *list; + int n; WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex)); - list_for_each_entry_safe(tmp, next, - &pool->cache_list, batch_pool_list) { + n = fls(size >> PAGE_SHIFT) - 1;
Is this better in some way that size / PAGE_SIZE - 1? Or I went from trusting compiler smartness to little to too much? :)
+ if (n >= ARRAY_SIZE(pool->cache_list)) + n = ARRAY_SIZE(pool->cache_list) - 1; + list = &pool->cache_list[n]; + + list_for_each_entry_safe(tmp, next, list, batch_pool_link) { /* The batches are strictly LRU ordered */ if (tmp->active) break; /* While we're looping, do some clean up */ if (tmp->madv == __I915_MADV_PURGED) { - list_del(&tmp->batch_pool_list); + list_del(&tmp->batch_pool_link); drm_gem_object_unreference(&tmp->base); continue; } - /* - * Select a buffer that is at least as big as needed - * but not 'too much' bigger. A better way to do this - * might be to bucket the pool objects based on size. - */ - if (tmp->base.size >= size && tmp->base.size <= 2 * size) { + if (tmp->base.size >= size) { obj = tmp; break; } @@ -132,7 +141,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, obj->madv = I915_MADV_DONTNEED; } - list_move_tail(&obj->batch_pool_list, &pool->cache_list); + list_move_tail(&obj->batch_pool_link, list); i915_gem_object_pin_pages(obj); return obj; } diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 5ed70ef6a887..848e90703eed 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -29,7 +29,7 @@ struct i915_gem_batch_pool { struct drm_device *dev; - struct list_head cache_list; + struct list_head cache_list[4]; }; /* i915_gem_batch_pool.c */
Pretty straightforward and trusting you on bucket sizes, Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx