Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gpu_error.c | 132 ++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gpu_error.h | 10 ++
2 files changed, 142 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f020c0086fbcd..03d62c250c465 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -2219,3 +2219,135 @@ void i915_disable_error_state(struct drm_i915_private *i915, int err)
i915->gpu_error.first_error = ERR_PTR(err);
spin_unlock_irq(&i915->gpu_error.lock);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
+{
+ static int g_count;
+ struct drm_i915_private *i915 = gt->i915;
+ struct i915_gpu_coredump *error;
+ intel_wakeref_t wakeref;
+ size_t buf_size = PAGE_SIZE * 128;
+ size_t pos_err;
+ char *buf, *ptr, *next;
+ int l_count = g_count++;
+ int line = 0;
+
+ /* Can't allocate memory during a reset */
+ if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) {
+ drm_err(>->i915->drm, "[Capture/%d.%d] Inside GT reset, skipping error capture :(\n",
+ l_count, line++);
+ return;
+ }
+
+ error = READ_ONCE(i915->gpu_error.first_error);
+ if (error) {
+ drm_err(&i915->drm, "[Capture/%d.%d] Clearing existing error capture first...\n",
+ l_count, line++);
+ i915_reset_error_state(i915);
+ }
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ error = i915_gpu_coredump(gt, engine_mask, CORE_DUMP_FLAG_NONE);
+
+ if (IS_ERR(error)) {
+ drm_err(&i915->drm, "[Capture/%d.%d] Failed to capture error capture: %ld!\n",
+ l_count, line++, PTR_ERR(error));
+ return;
+ }
+
+ buf = kvmalloc(buf_size, GFP_KERNEL);
+ if (!buf) {
+ drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer for error capture!\n",
+ l_count, line++);
+ i915_gpu_coredump_put(error);
+ return;
+ }
+
+ drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915 error capture for %ps...\n",
+ l_count, line++, __builtin_return_address(0));
+
+ /* Largest string length safe to print via dmesg */
+# define MAX_CHUNK 800
+
+ pos_err = 0;
+ while (1) {
+ ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf, pos_err, buf_size - 1);
+
+ if (got <= 0)
+ break;
+
+ buf[got] = 0;
+ pos_err += got;
+
+ ptr = buf;
+ while (got > 0) {
+ size_t count;
+ char tag[2];
+
+ next = strnchr(ptr, got, '\n');
+ if (next) {
+ count = next - ptr;
+ *next = 0;
+ tag[0] = '>';
+ tag[1] = '<';
+ } else {
+ count = got;
+ tag[0] = '}';
+ tag[1] = '{';
+ }
+
+ if (count > MAX_CHUNK) {
+ size_t pos;
+ char *ptr2 = ptr;
+
+ for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
+ char chr = ptr[pos];
+
+ ptr[pos] = 0;
+ drm_info(&i915->drm, "[Capture/%d.%d] }%s{\n",
+ l_count, line++, ptr2);
+ ptr[pos] = chr;
+ ptr2 = ptr + pos;
+
+ /*
+ * If spewing large amounts of data via a serial console,
+ * this can be a very slow process. So be friendly and try
+ * not to cause 'softlockup on CPU' problems.
+ */
+ cond_resched();
+ }
+
+ if (ptr2 < (ptr + count))
+ drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
+ l_count, line++, tag[0], ptr2, tag[1]);
+ else if (tag[0] == '>')
+ drm_info(&i915->drm, "[Capture/%d.%d] ><\n",
+ l_count, line++);
+ } else {
+ drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
+ l_count, line++, tag[0], ptr, tag[1]);
+ }
+
+ ptr = next;
+ got -= count;
+ if (next) {
+ ptr++;
+ got--;
+ }
+
+ /* As above. */
+ cond_resched();
+ }
+
+ if (got)
+ drm_info(&i915->drm, "[Capture/%d.%d] Got %zd bytes remaining!\n",
+ l_count, line++, got);
+ }
+
+ kvfree(buf);
+
+ drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n", l_count, line++, pos_err);
+}
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index a91932cc65317..a78c061ce26fb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -258,6 +258,16 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
#define CORE_DUMP_FLAG_NONE 0x0
#define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) && IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask);
+#else
+static inline void intel_klog_error_capture(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
+{
+}
+#endif
+
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
__printf(2, 3)