Re: [PATCH v5 27/35] drm/i915: Added debug state dump facilities to scheduler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The intention of the state dump code is not really for it to be a user accessible debugfs entry (although one of the later patches does add a debugfs interface). It is more intended for debugging lock ups and unexpected behaviour by adding a dump function call to whatever WARN_ON or similar is being hit. So in theory, the code should not be necessary and there is no point upstreaming it. On the other hand, it does come in very handy when debugging scheduler related issues and it is complicated enough that you can't just knock it up in five minutes when a bug report is logged.

The code could certainly be moved into a separate file, e.g. i915_scheduler_debug.c. I don't think it would be good to move it to debugfs and make it all 'seq_printf' style output only. It is much more use as ordinary printk style output so you can call it directly at the point of detecting an inconsistent state and get the dmesg output prior to the kernel panic.


On 07/03/2016 12:31, Joonas Lahtinen wrote:
Hi,

On to, 2016-02-18 at 14:27 +0000, John.C.Harrison@xxxxxxxxx wrote:
From: John Harrison <John.C.Harrison@xxxxxxxxx>

When debugging batch buffer submission issues, it is useful to be able
to see what the current state of the scheduler is. This change adds
functions for decoding the internal scheduler state and reporting it.

v3: Updated a debug message with the new state_str() function.

v4: Wrapped some long lines to keep the style checker happy. Removed
the fence/sync code as that will now be part of a separate patch series.

v5: Removed forward declarations and white space. Added documentation.
[Joonas Lahtinen]

Also squashed in later patch to add seqno information from the start.
It was only being added in a separate patch due to historical reasons
which have since gone away.

For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_scheduler.c | 302 +++++++++++++++++++++++++++++++++-
  drivers/gpu/drm/i915/i915_scheduler.h |  15 ++
  2 files changed, 315 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index f7f29d5..d0eed52 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -40,6 +40,117 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
  	return dev_priv->scheduler != NULL;
  }

These sort of functions should be in i915_debugfs.c, so that nobody
even thinks of using them for other purposes.

+const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
+{
+	static char	str[50];
+	char		*ptr = str;
+
+	*(ptr++) = node->bumped ? 'B' : '-',
+	*(ptr++) = i915_gem_request_completed(node->params.request) ? 'C' : '-';
+
Especially this kind of code needs to be in i915_debugfs.c. I'd
implement new code more along lines of i915_sseu_status(), but I see
this kind of code was previously merged.

+	*ptr = 0;
+
+	return str;
+}
+
+char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status)
+{
+	switch (status) {
+	case i915_sqs_none:
+	return 'N';
+
+	case i915_sqs_queued:
+	return 'Q';
+
+	case i915_sqs_popped:
+	return 'X';
+
+	case i915_sqs_flying:
+	return 'F';
+
+	case i915_sqs_complete:
+	return 'C';
+
+	case i915_sqs_dead:
+	return 'D';
+
+	default:
+	break;
+	}
+
Bad indent.

+	return '?';
+}
+
+const char *i915_scheduler_queue_status_str(
+				enum i915_scheduler_queue_status status)
+{
+	static char	str[50];
+
+	switch (status) {
+	case i915_sqs_none:
+	return "None";
+
+	case i915_sqs_queued:
+	return "Queued";
+
+	case i915_sqs_popped:
+	return "Popped";
+
+	case i915_sqs_flying:
+	return "Flying";
+
+	case i915_sqs_complete:
+	return "Complete";
+
+	case i915_sqs_dead:
+	return "Dead";
+
+	default:
+	break;
+	}
+
+	sprintf(str, "[Unknown_%d!]", status);
+	return str;
+}
+
+const char *i915_scheduler_flag_str(uint32_t flags)
+{
+	static char str[100];
+	char *ptr = str;
+
+	*ptr = 0;
+
+#define TEST_FLAG(flag, msg)						\
+	do {								\
+		if (flags & (flag)) {					\
+			strcpy(ptr, msg);				\
+			ptr += strlen(ptr);				\
+			flags &= ~(flag);				\
+		}							\
+	} while (0)
+
+	TEST_FLAG(i915_sf_interrupts_enabled, "IntOn|");
+	TEST_FLAG(i915_sf_submitting,         "Submitting|");
+	TEST_FLAG(i915_sf_dump_force,         "DumpForce|");
+	TEST_FLAG(i915_sf_dump_details,       "DumpDetails|");
+	TEST_FLAG(i915_sf_dump_dependencies,  "DumpDeps|");
+	TEST_FLAG(i915_sf_dump_seqno,         "DumpSeqno|");
+
+#undef TEST_FLAG
+
+	if (flags) {
+		sprintf(ptr, "Unknown_0x%X!", flags);
+		ptr += strlen(ptr);
+	}
+
+	if (ptr == str)
+		strcpy(str, "-");
+	else
+		ptr[-1] = 0;
+
+	return str;
+};
+
  /**
   * i915_scheduler_init - Initialise the scheduler.
   * @dev: DRM device
@@ -1024,6 +1135,193 @@ void i915_scheduler_work_handler(struct work_struct *work)
  		i915_scheduler_process_work(ring);
  }
+static int i915_scheduler_dump_locked(struct intel_engine_cs *ring,
+				      const char *msg)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_queue_entry *node;
+	int flying = 0, queued = 0, complete = 0, other = 0;
+	static int old_flying = -1, old_queued = -1, old_complete = -1;
+	bool b_dump;
+	char brkt[2] = { '<', '>' };
+
+	if (!ring)
+		return -EINVAL;
+
+	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+		if (I915_SQS_IS_QUEUED(node))
+			queued++;
+		else if (I915_SQS_IS_FLYING(node))
+			flying++;
+		else if (I915_SQS_IS_COMPLETE(node))
+			complete++;
+		else
+			other++;
+	}
+
+	b_dump = (flying != old_flying) ||
+		 (queued != old_queued) ||
+		 (complete != old_complete);
+	if (scheduler->flags[ring->id] & i915_sf_dump_force) {
+		if (!b_dump) {
+			b_dump = true;
+			brkt[0] = '{';
+			brkt[1] = '}';
+		}
+
+		scheduler->flags[ring->id] &= ~i915_sf_dump_force;
+	}
+
+	if (b_dump) {
+		old_flying   = flying;
+		old_queued   = queued;
+		old_complete = complete;
+		DRM_DEBUG_DRIVER("<%s> Q:%02d, F:%02d, C:%02d, O:%02d, "
+				 "Flags = %s, Next = %d:%d %c%s%c\n",
+				 ring->name, queued, flying, complete, other,
+				 i915_scheduler_flag_str(scheduler->flags[ring->id]),
+				 dev_priv->request_uniq, dev_priv->next_seqno,
+				 brkt[0], msg, brkt[1]);
Convert to debugfs, it's the de-facto.

+	} else {
+		/*DRM_DEBUG_DRIVER("<%s> Q:%02d, F:%02d, C:%02d, O:%02d"
+				 ", Flags = %s, Next = %d:%d [%s]\n",
+				 ring->name,
+				 queued, flying, complete, other,
+				 i915_scheduler_flag_str(scheduler->flags[ring->id]),
+				 dev_priv->request_uniq, dev_priv->next_seqno, msg); */
+
Dead code to be removed from patches.

+		return 0;
+	}
+
+	if (scheduler->flags[ring->id] & i915_sf_dump_seqno) {
+		uint32_t seqno;
+
+		seqno = ring->get_seqno(ring, true);
+
+		DRM_DEBUG_DRIVER("<%s> Seqno = %d\n", ring->name, seqno);
+	}
+
+	if (scheduler->flags[ring->id] & i915_sf_dump_details) {
+		int i, deps;
+		uint32_t count, counts[i915_sqs_MAX];
+
+		memset(counts, 0x00, sizeof(counts));
+
+		list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+			if (node->status < i915_sqs_MAX) {
+				count = counts[node->status]++;
+			} else {
+				DRM_DEBUG_DRIVER("<%s>   Unknown status: %d!\n",
+						 ring->name, node->status);
+				count = -1;
+			}
+
+			deps = 0;
+			for (i = 0; i < node->num_deps; i++)
+				if (i915_scheduler_is_dependency_valid(node, i))
+					deps++;
+
+			DRM_DEBUG_DRIVER("<%s>   %c:%02d> uniq = %d, seqno"
+					 " = %d/%s, deps = %d / %d, %s [pri = "
+					 "%4d]\n", ring->name,
+					 i915_scheduler_queue_status_chr(node->status),
+					 count,
+					 node->params.request->uniq,
+					 node->params.request->seqno,
+					 node->params.ring->name,
+					 deps, node->num_deps,
+					 i915_qe_state_str(node),
+					 node->priority);
+
+			if ((scheduler->flags[ring->id] & i915_sf_dump_dependencies)
+				== 0)
+				continue;
+
+			for (i = 0; i < node->num_deps; i++)
+				if (node->dep_list[i])
+					DRM_DEBUG_DRIVER("<%s>       |-%c:"
+						"%02d%c uniq = %d, seqno = %d/%s, %s [pri = %4d]\n",
+						ring->name,
+						i915_scheduler_queue_status_chr(node->dep_list[i]->status),
+						i,
+						i915_scheduler_is_dependency_valid(node, i)
+							? '>' : '#',
+						node->dep_list[i]->params.request->uniq,
+						node->dep_list[i]->params.request->seqno,
+						node->dep_list[i]->params.ring->name,
+						i915_qe_state_str(node->dep_list[i]),
+						node->dep_list[i]->priority);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i915_scheduler_dump - dump the scheduler's internal state to the debug log.
+ * @ring: Ring to dump info for
+ * @msg: A reason why it is being dumped
+ * For debugging purposes, it can be very useful to see the internal state of
+ * the scheduler for a given ring.
+ */
+int i915_scheduler_dump(struct intel_engine_cs *ring, const char *msg)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+	ret = i915_scheduler_dump_locked(ring, msg);
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	return ret;
+}
+
This kind of stuff especially should be static functions in
i915_debugfs.c. And then they don't need kerneldoc either, because we
expect no sane invidual to call them.

+static int i915_scheduler_dump_all_locked(struct drm_device *dev,
+					  const char *msg)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	struct intel_engine_cs *ring;
+	int i, r, ret = 0;
+
+	for_each_ring(ring, dev_priv, i) {
+		scheduler->flags[ring->id] |= i915_sf_dump_force   |
+					      i915_sf_dump_details |
+					      i915_sf_dump_seqno   |
+					      i915_sf_dump_dependencies;
This looks really bad to me, invasive debugging. Rather to be carried
as function parameters in the debug functions, and they should be
uppercase enum in i915_debugfs.c too.

+		r = i915_scheduler_dump_locked(ring, msg);
+		if (ret == 0)
+			ret = r;
+	}
+
+	return ret;
+}
+
+/**
+ * i915_scheduler_dump_all - dump the scheduler's internal state to the debug
+ * log.
+ * @dev: DRM device
+ * @msg: A reason why it is being dumped
+ * For debugging purposes, it can be very useful to see the internal state of
+ * the scheduler.
+ */
+int i915_scheduler_dump_all(struct drm_device *dev, const char *msg)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+	ret = i915_scheduler_dump_all_locked(dev, msg);
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	return ret;
+}
+
  static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ring,
  					      bool is_locked)
  {
@@ -1246,10 +1544,10 @@ int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file)
  				continue;
if (!I915_SQS_IS_COMPLETE(node))
-				DRM_DEBUG_DRIVER("Closing file handle with outstanding work: %d:%d/%d on %s\n",
+				DRM_DEBUG_DRIVER("Closing file handle with outstanding work: %d:%d/%s on %s\n",
  						 node->params.request->uniq,
  						 node->params.request->seqno,
-						 node->status,
+						 i915_qe_state_str(node),
If the code is to be called from outside of i915_debugfs.c, it should
really be written in a more safe manner than static char, especially as
it is not much of an effort to make a table to index.

Regards, Joonas

  						 ring->name);
i915_scheduler_file_queue_dec(node->params.file);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index b78de12..a071ebb 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -41,6 +41,9 @@ enum i915_scheduler_queue_status {
  	/* Limit value for use with arrays/loops */
  	i915_sqs_MAX
  };
+char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status);
+const char *i915_scheduler_queue_status_str(
+				enum i915_scheduler_queue_status status);
#define I915_SQS_IS_QUEUED(node) (((node)->status == i915_sqs_queued))
  #define I915_SQS_IS_FLYING(node)	(((node)->status == i915_sqs_flying))
@@ -65,6 +68,7 @@ struct i915_scheduler_queue_entry {
  	unsigned long                       stamp;
  	struct list_head                    link;
  };
+const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node);
struct i915_scheduler {
  	struct list_head    node_queue[I915_NUM_RINGS];
@@ -82,9 +86,17 @@ struct i915_scheduler {
/* Flag bits for i915_scheduler::flags */
  enum {
+	/* Internal state */
  	i915_sf_interrupts_enabled  = (1 << 0),
  	i915_sf_submitting          = (1 << 1),
+
+	/* Dump/debug flags */
+	i915_sf_dump_force          = (1 << 8),
+	i915_sf_dump_details        = (1 << 9),
+	i915_sf_dump_dependencies   = (1 << 10),
+	i915_sf_dump_seqno          = (1 << 11),
  };
+const char *i915_scheduler_flag_str(uint32_t flags);
bool i915_scheduler_is_enabled(struct drm_device *dev);
  int i915_scheduler_init(struct drm_device *dev);
@@ -99,6 +111,9 @@ void i915_scheduler_work_handler(struct work_struct *work);
  int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
  int i915_scheduler_flush_stamp(struct intel_engine_cs *ring,
  			       unsigned long stamp, bool is_locked);
+int i915_scheduler_dump(struct intel_engine_cs *ring,
+			const char *msg);
+int i915_scheduler_dump_all(struct drm_device *dev, const char *msg);
  bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req,
  				       bool *completed, bool *busy);
  bool i915_scheduler_file_queue_is_full(struct drm_file *file);

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux