Does anyone even know what that means or why it (apparently totally
randomly) hits some patch sets and not others?
If you mean the checkpatch warnings. One is about not reporting out of
memory errors (because you are supposed to return -ENOMEM and let the user
handle it instead), but that doesn't apply for an internal kernel only
thing which is already just a debug print. The other is about macro
argument re-use, which is not an issue in this case and not worth
re-writing the code to avoid.
John.
> LGTM,
> Reviewed-by: Vinay Belgaumkar [3]<vinay.belgaumkar@xxxxxxxxx>
>> Signed-off-by: John Harrison [4]<John.C.Harrison@xxxxxxxxx>
>> ---
>> drivers/gpu/drm/i915/i915_gpu_error.c | 132
>> ++++++++++++++++++++++++++
>> drivers/gpu/drm/i915/i915_gpu_error.h | 10 ++
>> 2 files changed, 142 insertions(+)
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c
>> b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index f020c0086fbcd..03d62c250c465 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -2219,3 +2219,135 @@ void i915_disable_error_state(struct
>> drm_i915_private *i915, int err)
>> i915->gpu_error.first_error = ERR_PTR(err);
>> spin_unlock_irq(&i915->gpu_error.lock);
>> }
>> +
>> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>> +void intel_klog_error_capture(struct intel_gt *gt,
>> + intel_engine_mask_t engine_mask)
>> +{
>> + static int g_count;
>> + struct drm_i915_private *i915 = gt->i915;
>> + struct i915_gpu_coredump *error;
>> + intel_wakeref_t wakeref;
>> + size_t buf_size = PAGE_SIZE * 128;
>> + size_t pos_err;
>> + char *buf, *ptr, *next;
>> + int l_count = g_count++;
>> + int line = 0;
>> +
>> + /* Can't allocate memory during a reset */
>> + if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) {
>> + drm_err(>->i915->drm, "[Capture/%d.%d] Inside GT reset,
>> skipping error capture :(\n",
>> + l_count, line++);
>> + return;
>> + }
>> +
>> + error = READ_ONCE(i915->gpu_error.first_error);
>> + if (error) {
>> + drm_err(&i915->drm, "[Capture/%d.%d] Clearing existing error
>> capture first...\n",
>> + l_count, line++);
>> + i915_reset_error_state(i915);
>> + }
>> +
>> + with_intel_runtime_pm(&i915->runtime_pm, wakeref)
>> + error = i915_gpu_coredump(gt, engine_mask,
>> CORE_DUMP_FLAG_NONE);
>> +
>> + if (IS_ERR(error)) {
>> + drm_err(&i915->drm, "[Capture/%d.%d] Failed to capture error
>> capture: %ld!\n",
>> + l_count, line++, PTR_ERR(error));
>> + return;
>> + }
>> +
>> + buf = kvmalloc(buf_size, GFP_KERNEL);
>> + if (!buf) {
>> + drm_err(&i915->drm, "[Capture/%d.%d] Failed to allocate buffer
>> for error capture!\n",
>> + l_count, line++);
>> + i915_gpu_coredump_put(error);
>> + return;
>> + }
>> +
>> + drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915 error capture
>> for %ps...\n",
>> + l_count, line++, __builtin_return_address(0));
>> +
>> + /* Largest string length safe to print via dmesg */
>> +# define MAX_CHUNK 800
>> +
>> + pos_err = 0;
>> + while (1) {
>> + ssize_t got = i915_gpu_coredump_copy_to_buffer(error, buf,
>> pos_err, buf_size - 1);
>> +
>> + if (got <= 0)
>> + break;
>> +
>> + buf[got] = 0;
>> + pos_err += got;
>> +
>> + ptr = buf;
>> + while (got > 0) {
>> + size_t count;
>> + char tag[2];
>> +
>> + next = strnchr(ptr, got, '\n');
>> + if (next) {
>> + count = next - ptr;
>> + *next = 0;
>> + tag[0] = '>';
>> + tag[1] = '<';
>> + } else {
>> + count = got;
>> + tag[0] = '}';
>> + tag[1] = '{';
>> + }
>> +
>> + if (count > MAX_CHUNK) {
>> + size_t pos;
>> + char *ptr2 = ptr;
>> +
>> + for (pos = MAX_CHUNK; pos < count; pos += MAX_CHUNK) {
>> + char chr = ptr[pos];
>> +
>> + ptr[pos] = 0;
>> + drm_info(&i915->drm, "[Capture/%d.%d] }%s{\n",
>> + l_count, line++, ptr2);
>> + ptr[pos] = chr;
>> + ptr2 = ptr + pos;
>> +
>> + /*
>> + * If spewing large amounts of data via a serial
>> console,
>> + * this can be a very slow process. So be friendly
>> and try
>> + * not to cause 'softlockup on CPU' problems.
>> + */
>> + cond_resched();
>> + }
>> +
>> + if (ptr2 < (ptr + count))
>> + drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
>> + l_count, line++, tag[0], ptr2, tag[1]);
>> + else if (tag[0] == '>')
>> + drm_info(&i915->drm, "[Capture/%d.%d] ><\n",
>> + l_count, line++);
>> + } else {
>> + drm_info(&i915->drm, "[Capture/%d.%d] %c%s%c\n",
>> + l_count, line++, tag[0], ptr, tag[1]);
>> + }
>> +
>> + ptr = next;
>> + got -= count;
>> + if (next) {
>> + ptr++;
>> + got--;
>> + }
>> +
>> + /* As above. */
>> + cond_resched();
>> + }
>> +
>> + if (got)
>> + drm_info(&i915->drm, "[Capture/%d.%d] Got %zd bytes
>> remaining!\n",
>> + l_count, line++, got);
>> + }
>> +
>> + kvfree(buf);
>> +
>> + drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd bytes\n",
>> l_count, line++, pos_err);
>> +}
>> +#endif
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h
>> b/drivers/gpu/drm/i915/i915_gpu_error.h
>> index a91932cc65317..a78c061ce26fb 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
>> @@ -258,6 +258,16 @@ static inline u32 i915_reset_engine_count(struct
>> i915_gpu_error *error,
>> #define CORE_DUMP_FLAG_NONE 0x0
>> #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
>> +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) &&
>> IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>> +void intel_klog_error_capture(struct intel_gt *gt,
>> + intel_engine_mask_t engine_mask);
>> +#else
>> +static inline void intel_klog_error_capture(struct intel_gt *gt,
>> + intel_engine_mask_t engine_mask)
>> +{
>> +}
>> +#endif
>> +
>> #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>> __printf(2, 3)
References
Visible links
1. mailto:John.C.Harrison@xxxxxxxxx
2. mailto:John.C.Harrison@xxxxxxxxx
3. mailto:vinay.belgaumkar@xxxxxxxxx
4. mailto:John.C.Harrison@xxxxxxxxx