[PATCH] drm/i915: Compute a loadavg as the number of pending requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The number of pending requests correlates to the number of batches
queued and so with the amount of work being performed by the GPU. We
borrow the concept of loadavg from the scheduler, using the count of
batches as the count of runnables, and simply adjust the timescales for
a 60x higher sample rate, i.e. the three values are the load average for
approximately the last 1s, 5s and 15 respectively. The counter is
started only once GPU activity is detected, and is then sampled at 60 Hz
until 10s after the GPU goes idle before the counters are reset and
sampling ceases.

Sample output for running aa10text under gnome-shell:

  Total: 55.29 57.54 61.14
  render ring: 54.80 57.17 60.76
  gen6 bsd ring: 0.00 0.00 0.00
  blt ring: 0.48 0.37 0.37

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Eugeni Dodonov <eugeni.dodonov at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   46 ++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h         |    4 ++
 drivers/gpu/drm/i915/i915_gem.c         |   86 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |    5 ++
 4 files changed, 141 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index bc72821..fde0dcf 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1414,6 +1414,51 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_gem_loadavg_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i, ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+	/* Average number of requests for the last 1s, 5s, 15s,
+	 * sampled roughly at 60Hz.
+	 */
+	seq_printf(m, "Total: %lu.%02lu %lu.%02lu %lu.%02lu\n",
+		   LOAD_INT(dev_priv->loadavg[0]),
+		   LOAD_FRAC(dev_priv->loadavg[0]),
+		   LOAD_INT(dev_priv->loadavg[1]),
+		   LOAD_FRAC(dev_priv->loadavg[1]),
+		   LOAD_INT(dev_priv->loadavg[2]),
+		   LOAD_FRAC(dev_priv->loadavg[2]));
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+
+		seq_printf(m, "%s: %lu.%02lu %lu.%02lu %lu.%02lu\n",
+			   ring->name,
+			   LOAD_INT(ring->loadavg[0]),
+			   LOAD_FRAC(ring->loadavg[0]),
+			   LOAD_INT(ring->loadavg[1]),
+			   LOAD_FRAC(ring->loadavg[1]),
+			   LOAD_INT(ring->loadavg[2]),
+			   LOAD_FRAC(ring->loadavg[2]));
+	}
+
+#undef LOAD_INT
+#undef LOAD_FRAC
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
 static int
 i915_debugfs_common_open(struct inode *inode,
 			 struct file *filp)
@@ -1817,6 +1862,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
+	{"i915_gem_loadavg", i915_gem_loadavg_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee8900f..db809b3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -360,6 +360,10 @@ typedef struct drm_i915_private {
 	uint32_t last_instdone;
 	uint32_t last_instdone1;
 
+	unsigned long loadavg[3];
+	unsigned long loadavg_last_sample;
+	struct delayed_work loadavg_work;
+
 	unsigned int stop_rings;
 
 	unsigned long cfb_size;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99c1bd8..931e7a6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1867,6 +1867,84 @@ i915_gem_retire_requests(struct drm_device *dev)
 		i915_gem_retire_requests_ring(&dev_priv->ring[i]);
 }
 
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	load *= exp;
+	load += active * (FIXED_1 - exp);
+	load += 1UL << (FSHIFT - 1);
+	return load >> FSHIFT;
+}
+
+static long
+i915_gem_update_loadavg(struct drm_i915_private *dev_priv)
+{
+	struct list_head *pos;
+	long total = 0;
+	int i;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+		long n;
+
+		n = 0;
+		list_for_each(pos, &ring->request_list)
+			n++;
+		total += n;
+
+		n = n > 0 ? n * FIXED_1 : 0;
+		ring->loadavg[0] = calc_load(ring->loadavg[0], EXP_1, n);
+		ring->loadavg[1] = calc_load(ring->loadavg[1], EXP_5, n);
+		ring->loadavg[2] = calc_load(ring->loadavg[2], EXP_15, n);
+	}
+
+	total = total > 0 ? total * FIXED_1 : 0;
+	dev_priv->loadavg[0] = calc_load(dev_priv->loadavg[0], EXP_1, total);
+	dev_priv->loadavg[1] = calc_load(dev_priv->loadavg[1], EXP_5, total);
+	dev_priv->loadavg[2] = calc_load(dev_priv->loadavg[2], EXP_15, total);
+
+	return total;
+}
+
+static void
+i915_gem_reset_loadavg(struct drm_i915_private *dev_priv)
+{
+	int i;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+		memset(ring->loadavg, 0, sizeof(ring->loadavg));
+	}
+
+	memset(dev_priv->loadavg, 0, sizeof(dev_priv->loadavg));
+}
+
+static void
+i915_gem_loadavg_worker(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv;
+
+	dev_priv = container_of(work, struct drm_i915_private,
+				loadavg_work.work);
+
+	if (!mutex_trylock(&dev_priv->dev->struct_mutex)) {
+		queue_delayed_work(dev_priv->wq, &dev_priv->loadavg_work, 0);
+		return;
+	}
+
+	if (i915_gem_update_loadavg(dev_priv)) {
+		dev_priv->loadavg_last_sample = jiffies;
+	} else if (time_before(dev_priv->loadavg_last_sample + 10*HZ, jiffies)) {
+		i915_gem_reset_loadavg(dev_priv);
+		dev_priv->loadavg_last_sample = 0;
+	}
+	if (dev_priv->loadavg_last_sample)
+		queue_delayed_work(dev_priv->wq,
+				   &dev_priv->loadavg_work, HZ/60);
+
+	mutex_unlock(&dev_priv->dev->struct_mutex);
+}
+
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -1912,7 +1990,13 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	if (!dev_priv->mm.suspended && !idle)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
 
+	/* Initialiate loadavg sampling? */
+	idle = !idle && dev_priv->loadavg_last_sample == 0;
+
 	mutex_unlock(&dev->struct_mutex);
+
+	if (idle)
+		i915_gem_loadavg_worker(&dev_priv->loadavg_work.work);
 }
 
 /**
@@ -3805,6 +3889,8 @@ i915_gem_load(struct drm_device *dev)
 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
 			  i915_gem_retire_work_handler);
+	INIT_DELAYED_WORK(&dev_priv->loadavg_work,
+			  i915_gem_loadavg_worker);
 	init_completion(&dev_priv->error_completion);
 
 	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c8b9cc0..2d267bc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -107,6 +107,11 @@ struct  intel_ring_buffer {
 	 */
 	u32 outstanding_lazy_request;
 
+	/**
+	 * An average of the number requests over the last 1,5,15 seconds.
+	 */
+	unsigned long loadavg[3];
+
 	wait_queue_head_t irq_queue;
 	drm_local_map_t map;
 
-- 
1.7.8.2



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux