Re: [PATCH 3/6] drm/i915: Split the batch pool by engine

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 03/09/2015 09:55 AM, Chris Wilson wrote:
I woke up one morning and found 50k objects sitting in the batch pool
and every search seemed to iterate the entire list... Painting the
screen in oils would provide a more fluid display.

One issue with the current design is that we only check for retirements
on the current ring when preparing to submit a new batch. This means
that we can have thousands of "active" batches on another ring that we
have to walk over. The simplest way to avoid that is to split the pools
per ring and then our LRU execution ordering will also ensure that the
inactive buffers remain at the front.

Is the retire work handler not keeping up? Regularly or under some specific circumstances?

v2: execlists still requires duplicate code.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_debugfs.c        | 33 ++++++++++++++++++------------
  drivers/gpu/drm/i915/i915_dma.c            |  1 -
  drivers/gpu/drm/i915/i915_drv.h            |  8 --------
  drivers/gpu/drm/i915/i915_gem.c            |  2 --
  drivers/gpu/drm/i915/i915_gem_batch_pool.c |  3 ++-
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
  drivers/gpu/drm/i915/intel_lrc.c           |  1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 ++
  drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
  9 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d2e6475b0466..d8ca5c89647c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -377,13 +377,17 @@ static void print_batch_pool_stats(struct seq_file *m,
  {
  	struct drm_i915_gem_object *obj;
  	struct file_stats stats;
+	struct intel_engine_cs *ring;
+	int i;

  	memset(&stats, 0, sizeof(stats));

-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list)
-		per_file_stats(0, obj, &stats);
+	for_each_ring(ring, dev_priv, i) {
+		list_for_each_entry(obj,
+				    &ring->batch_pool.cache_list,
+				    batch_pool_list)
+			per_file_stats(0, obj, &stats);
+	}

  	print_file_stats(m, "batch pool", stats);
  }
@@ -619,21 +623,24 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
  	struct drm_device *dev = node->minor->dev;
  	struct drm_i915_private *dev_priv = dev->dev_private;
  	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *ring;
  	int count = 0;
-	int ret;
+	int ret, i;

  	ret = mutex_lock_interruptible(&dev->struct_mutex);
  	if (ret)
  		return ret;

-	seq_puts(m, "cache:\n");
-	list_for_each_entry(obj,
-			    &dev_priv->mm.batch_pool.cache_list,
-			    batch_pool_list) {
-		seq_puts(m, "   ");
-		describe_obj(m, obj);
-		seq_putc(m, '\n');
-		count++;
+	for_each_ring(ring, dev_priv, i) {
+		seq_printf(m, "%s cache:\n", ring->name);
+		list_for_each_entry(obj,
+				    &ring->batch_pool.cache_list,
+				    batch_pool_list) {
+			seq_puts(m, "   ");
+			describe_obj(m, obj);
+			seq_putc(m, '\n');
+			count++;
+		}
  	}

  	seq_printf(m, "total: %d\n", count);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8e914303b831..70e050ac02b4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1062,7 +1062,6 @@ int i915_driver_unload(struct drm_device *dev)

  	mutex_lock(&dev->struct_mutex);
  	i915_gem_cleanup_ringbuffer(dev);
-	i915_gem_batch_pool_fini(&dev_priv->mm.batch_pool);
  	i915_gem_context_fini(dev);
  	mutex_unlock(&dev->struct_mutex);
  	i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b62e3c478221..9fe1274cfe59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -37,7 +37,6 @@
  #include "intel_bios.h"
  #include "intel_ringbuffer.h"
  #include "intel_lrc.h"
-#include "i915_gem_batch_pool.h"
  #include "i915_gem_gtt.h"
  #include "i915_gem_render_state.h"
  #include <linux/io-mapping.h>
@@ -1149,13 +1148,6 @@ struct i915_gem_mm {
  	 */
  	struct list_head unbound_list;

-	/*
-	 * A pool of objects to use as shadow copies of client batch buffers
-	 * when the command parser is enabled. Prevents the client from
-	 * modifying the batch contents after software parsing.
-	 */
-	struct i915_gem_batch_pool batch_pool;
-
  	/** Usable portion of the GTT for GEM */
  	unsigned long stolen_base; /* limited to low memory (32-bit) */

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4d8fb3f4a7a1..9f33005bdfd1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5039,8 +5039,6 @@ i915_gem_load(struct drm_device *dev)
  	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
  	register_oom_notifier(&dev_priv->mm.oom_notifier);

-	i915_gem_batch_pool_init(dev, &dev_priv->mm.batch_pool);
-
  	mutex_init(&dev_priv->fb_tracking.lock);
  }

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 21f3356cc0ab..1287abf55b84 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -96,8 +96,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,

  	list_for_each_entry_safe(tmp, next,
  				 &pool->cache_list, batch_pool_list) {
+		/* The batches are strictly LRU ordered */
  		if (tmp->active)
-			continue;
+			break;

This hunk would be a better fit for 2/6, correct?

  		/* While we're looping, do some clean up */
  		if (tmp->madv == __I915_MADV_PURGED) {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 36f8f05928d1..8129a8c75a21 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1152,12 +1152,11 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
  			  u32 batch_len,
  			  bool is_master)
  {
-	struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
  	struct drm_i915_gem_object *shadow_batch_obj;
  	struct i915_vma *vma;
  	int ret;

-	shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
  						   PAGE_ALIGN(batch_len));
  	if (IS_ERR(shadow_batch_obj))
  		return shadow_batch_obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074bd55dc..a6d4d5502188 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1421,6 +1421,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
  	ring->dev = dev;
  	INIT_LIST_HEAD(&ring->active_list);
  	INIT_LIST_HEAD(&ring->request_list);
+	i915_gem_batch_pool_init(dev, &ring->batch_pool);
  	init_waitqueue_head(&ring->irq_queue);

  	INIT_LIST_HEAD(&ring->execlist_queue);


Cleanup part for _lrc ?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux