On Tue, May 02, 2023 at 07:57:02AM +0000, Matthew Brost wrote: > On Thu, Apr 27, 2023 at 10:28:13AM +0200, Thomas Hellström wrote: > > > > On 4/26/23 22:57, Rodrigo Vivi wrote: > > > The goal is to use devcoredump infrastructure to report error states > > > captured at the crash time. > > > > > > The error state will contain useful information for GPU hang debug, such > > > as INSTDONE registers and the current buffers getting executed, as well > > > as any other information that helps user space and allow later replays of > > > the error. > > > > > > The proposal here is to avoid a Xe only error_state like i915 and use > > > a standard dev_coredump infrastructure to expose the error state. > > > > > > For our own case, the data is only useful if it is a snapshot of the > > > time when the GPU crash has happened, since we reset the GPU immediately > > > after and the registers might have changed. So the proposal here is to > > > have an internal snapshot to be printed out later. > > > > > > Also, usually a subsequent GPU hang can be only a cause of the initial > > > one. So we only save the 'first' hang. The dev_coredump has a delayed > > > work queue where it remove the coredump and free all the data withing a > > > few moments of the error. When that happens we also reset our capture > > > state and allow further snapshots. > > > > > > Right now this infra only print out the time of the hang. More information > > > will be migrated here on subsequent work. Also, in order to organize the > > > dump better, the goal is to propose dev_coredump changes itself to allow > > > multiple files and different controls. But for now we start Xe usage of > > > it without any dependency on dev_coredump core changes. > > > > > > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > > > --- > > > drivers/gpu/drm/xe/Kconfig | 1 + > > > drivers/gpu/drm/xe/Makefile | 1 + > > > drivers/gpu/drm/xe/xe_devcoredump.c | 144 ++++++++++++++++++++++ > > > drivers/gpu/drm/xe/xe_devcoredump.h | 22 ++++ > > > drivers/gpu/drm/xe/xe_devcoredump_types.h | 47 +++++++ > > > drivers/gpu/drm/xe/xe_device_types.h | 4 + > > > drivers/gpu/drm/xe/xe_guc_submit.c | 2 + > > > drivers/gpu/drm/xe/xe_pci.c | 2 + > > > 8 files changed, 223 insertions(+) > > > create mode 100644 drivers/gpu/drm/xe/xe_devcoredump.c > > > create mode 100644 drivers/gpu/drm/xe/xe_devcoredump.h > > > create mode 100644 drivers/gpu/drm/xe/xe_devcoredump_types.h > > > > > > diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig > > > index f6f3b491d162..d44794f99338 100644 > > > --- a/drivers/gpu/drm/xe/Kconfig > > > +++ b/drivers/gpu/drm/xe/Kconfig > > > @@ -35,6 +35,7 @@ config DRM_XE > > > select DRM_TTM_HELPER > > > select DRM_SCHED > > > select MMU_NOTIFIER > > > + select WANT_DEV_COREDUMP > > > help > > > Experimental driver for Intel Xe series GPUs > > > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile > > > index ee4a95beec20..9d675f7c77aa 100644 > > > --- a/drivers/gpu/drm/xe/Makefile > > > +++ b/drivers/gpu/drm/xe/Makefile > > > @@ -34,6 +34,7 @@ xe-y += xe_bb.o \ > > > xe_bo.o \ > > > xe_bo_evict.o \ > > > xe_debugfs.o \ > > > + xe_devcoredump.o \ > > > xe_device.o \ > > > xe_dma_buf.o \ > > > xe_engine.o \ > > > diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c > > > new file mode 100644 > > > index 000000000000..d9531183f03a > > > --- /dev/null > > > +++ b/drivers/gpu/drm/xe/xe_devcoredump.c > > > @@ -0,0 +1,144 @@ > > > +// SPDX-License-Identifier: MIT > > > +/* > > > + * Copyright © 2023 Intel Corporation > > > + */ > > > + > > > +#include "xe_devcoredump.h" > > > +#include "xe_devcoredump_types.h" > > > + > > > +#include <linux/devcoredump.h> > > > +#include <generated/utsrelease.h> > > > + > > > +#include "xe_engine.h" > > > +#include "xe_gt.h" > > > + > > > +/** > > > + * DOC: Xe device coredump > > > + * > > > + * Devices overview: > > > + * Xe uses dev_coredump infrastructure for exposing the crash errors in a > > > + * standardized way. > > > + * devcoredump exposes a temporary device under /sys/class/devcoredump/ > > > + * which is linked with our card device directly. > > > + * The core dump can be accessed either from > > > + * /sys/class/drm/card<n>/device/devcoredump/ or from > > > + * /sys/class/devcoredump/devcd<m> where > > > + * /sys/class/devcoredump/devcd<m>/failing_device is a link to > > > + * /sys/class/drm/card<n>/device/. > > > + * > > > + * Snapshot at hang: > > > + * The 'data' file is printed with a drm_printer pointer at devcoredump read > > > + * time. For this reason, we need to take snapshots from when the hang has > > > + * happened, and not only when the user is reading the file. Otherwise the > > > + * information is outdated since the resets might have happened in between. > > > + * > > > + * 'First' failure snapshot: > > > + * In general, the first hang is the most critical one since the following hangs > > > + * can be a consequence of the initial hang. For this reason we only take the > > > + * snapshot of the 'first' failure and ignore subsequent calls of this function, > > > + * at least while the coredump device is alive. Dev_coredump has a delayed work > > > + * queue that will eventually delete the device and free all the dump > > > + * information. At this time we also clear the faulty_engine and allow the next > > > + * hang capture. > > > + */ > > > + > > > +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, > > > + size_t count, void *data, size_t datalen) > > > +{ > > > + struct xe_devcoredump *coredump = data; > > > + struct xe_devcoredump_snapshot *ss; > > > + struct drm_printer p; > > > + struct drm_print_iterator iter; > > > + struct timespec64 ts; > > > + > > > + iter.data = buffer; > > > + iter.offset = 0; > > > + iter.start = offset; > > > + iter.remain = count; > > > + > > > + mutex_lock(&coredump->lock); > > > + > > > + ss = &coredump->snapshot; > > > + p = drm_coredump_printer(&iter); > > > + > > > + drm_printf(&p, "**** Xe Device Coredump ****\n"); > > > + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); > > > + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); > > > + > > > + ts = ktime_to_timespec64(ss->snapshot_time); > > > + drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); > > > + ts = ktime_to_timespec64(ss->boot_time); > > > + drm_printf(&p, "Boot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); > > > + ts = ktime_to_timespec64(ktime_sub(ss->snapshot_time, ss->boot_time)); > > > + drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); > > > + > > > + mutex_unlock(&coredump->lock); > > > + > > > + return count - iter.remain; > > > +} > > > + > > > +static void xe_devcoredump_free(void *data) > > > +{ > > > + struct xe_devcoredump *coredump = data; > > > + struct xe_device *xe = container_of(coredump, struct xe_device, > > > + devcoredump); > > > + mutex_lock(&coredump->lock); > > > + > > > + coredump->faulty_engine = NULL; > > > + drm_info(&xe->drm, "Xe device coredump has been deleted.\n"); > > > + > > > + mutex_unlock(&coredump->lock); > > > +} > > > + > > > +static void devcoredump_snapshot(struct xe_devcoredump *coredump) > > > +{ > > > + struct xe_devcoredump_snapshot *ss = &coredump->snapshot; > > > + > > > + lockdep_assert_held(&coredump->lock); > > > + ss->snapshot_time = ktime_get_real(); > > > + ss->boot_time = ktime_get_boottime(); > > > +} > > > + > > > +/** > > > + * xe_devcoredump - Take the required snapshots and initialize coredump device. > > > + * @e: The faulty xe_engine, where the issue was detected. > > > + * > > > + * This function should be called at the crash time. It is skipped if we still > > > + * have the core dump device available with the information of the 'first' > > > + * snapshot. > > > + */ > > > +void xe_devcoredump(struct xe_engine *e) > > > +{ > > > + struct xe_device *xe = gt_to_xe(e->gt); > > > + struct xe_devcoredump *coredump = &xe->devcoredump; > > > > For !long running engines, this is the dma-fence signalling critical path, > > and since the drm_scheduler has not yet been properly annotated, we should > > probably annotate that here, to avoid seeing strange deadlocks during > > coredumps.... > > > > /Thomas > > > > +1 Just to confirm we are in the same page here, do you guys mean move dma_fence_begin_signalling() annotation here? or which annotation am I missing? > > Matt > > > > > > > > + > > > + mutex_lock(&coredump->lock); > > > + if (coredump->faulty_engine) { > > > + drm_dbg(&xe->drm, "Multiple hangs are occuring, but only the first snapshot was taken\n"); > > > + mutex_unlock(&coredump->lock); > > > + return; > > > + } > > > + coredump->faulty_engine = e; > > > + devcoredump_snapshot(coredump); > > > + mutex_unlock(&coredump->lock); > > > + > > > + drm_info(&xe->drm, "Xe device coredump has been created\n"); > > > + drm_info(&xe->drm, "Check your /sys/class/drm/card<n>/device/devcoredump/data\n"); > > > + > > > + dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, > > > + xe_devcoredump_read, xe_devcoredump_free); > > > +} > > > + > > > +/** > > > + * xe_devcoredump_init - Initialize xe_devcoredump. > > > + * @xe: Xe device. > > > + * > > > + * This function should be called at the probe so the mutex lock can be > > > + * initialized. > > > + */ > > > +void xe_devcoredump_init(struct xe_device *xe) > > > +{ > > > + struct xe_devcoredump *coredump = &xe->devcoredump; > > > + > > > + mutex_init(&coredump->lock); > > > +} > > > diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h > > > new file mode 100644 > > > index 000000000000..30941d2e554b > > > --- /dev/null > > > +++ b/drivers/gpu/drm/xe/xe_devcoredump.h > > > @@ -0,0 +1,22 @@ > > > +/* SPDX-License-Identifier: MIT */ > > > +/* > > > + * Copyright © 2023 Intel Corporation > > > + */ > > > + > > > +#ifndef _XE_DEVCOREDUMP_H_ > > > +#define _XE_DEVCOREDUMP_H_ > > > + > > > +struct xe_device; > > > +struct xe_engine; > > > + > > > +void xe_devcoredump_init(struct xe_device *xe); > > > + > > > +#ifdef CONFIG_DEV_COREDUMP > > > +void xe_devcoredump(struct xe_engine *e); > > > +#else > > > +static inline void xe_devcoredump(struct xe_engine *e) > > > +{ > > > +} > > > +#endif > > > + > > > +#endif > > > diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h > > > new file mode 100644 > > > index 000000000000..3f395fa9104e > > > --- /dev/null > > > +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h > > > @@ -0,0 +1,47 @@ > > > +/* SPDX-License-Identifier: MIT */ > > > +/* > > > + * Copyright © 2023 Intel Corporation > > > + */ > > > + > > > +#ifndef _XE_DEVCOREDUMP_TYPES_H_ > > > +#define _XE_DEVCOREDUMP_TYPES_H_ > > > + > > > +#include <linux/ktime.h> > > > +#include <linux/mutex.h> > > > + > > > +struct xe_device; > > > + > > > +/** > > > + * struct xe_devcoredump_snapshot - Crash snapshot > > > + * > > > + * This struct contains all the useful information quickly captured at the time > > > + * of the crash. So, any subsequent reads of the coredump points to a data that > > > + * shows the state of the GPU of when the issue has happened. > > > + */ > > > +struct xe_devcoredump_snapshot { > > > + /** @snapshot_time: Time of this capture. */ > > > + ktime_t snapshot_time; > > > + /** @boot_time: Relative boot time so the uptime can be calculated. */ > > > + ktime_t boot_time; > > > +}; > > > + > > > +/** > > > + * struct xe_devcoredump - Xe devcoredump main structure > > > + * > > > + * This struct represents the live and active dev_coredump node. > > > + * It is created/populated at the time of a crash/error. Then it > > > + * is read later when user access the device coredump data file > > > + * for reading the information. > > > + */ > > > +struct xe_devcoredump { > > > + /** @xe: Xe device. */ > > > + struct xe_device *xe; > > > + /** @falty_engine: Engine where the crash/error happened. */ > > > + struct xe_engine *faulty_engine; > > > + /** @lock: Protects data from races between capture and read out. */ > > > + struct mutex lock; > > > + /** @snapshot: Snapshot is captured at time of the first crash */ > > > + struct xe_devcoredump_snapshot snapshot; > > > +}; > > > + > > > +#endif > > > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h > > > index 1cb404e48aaa..2a0995824692 100644 > > > --- a/drivers/gpu/drm/xe/xe_device_types.h > > > +++ b/drivers/gpu/drm/xe/xe_device_types.h > > > @@ -12,6 +12,7 @@ > > > #include <drm/drm_file.h> > > > #include <drm/ttm/ttm_device.h> > > > +#include "xe_devcoredump_types.h" > > > #include "xe_gt_types.h" > > > #include "xe_platform_types.h" > > > #include "xe_step_types.h" > > > @@ -55,6 +56,9 @@ struct xe_device { > > > /** @drm: drm device */ > > > struct drm_device drm; > > > + /** @devcoredump: device coredump */ > > > + struct xe_devcoredump devcoredump; > > > + > > > /** @info: device info */ > > > struct intel_device_info { > > > /** @graphics_name: graphics IP name */ > > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c > > > index e857013070b9..231fb4145297 100644 > > > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > > > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > > > @@ -14,6 +14,7 @@ > > > #include <drm/drm_managed.h> > > > #include "regs/xe_lrc_layout.h" > > > +#include "xe_devcoredump.h" > > > #include "xe_device.h" > > > #include "xe_engine.h" > > > #include "xe_force_wake.h" > > > @@ -800,6 +801,7 @@ guc_engine_timedout_job(struct drm_sched_job *drm_job) > > > drm_warn(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", > > > xe_sched_job_seqno(job), e->guc->id, e->flags); > > > simple_error_capture(e); > > > + xe_devcoredump(e); > > > } else { > > > drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", > > > xe_sched_job_seqno(job), e->guc->id, e->flags); > > > diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c > > > index e512e8b69831..1d496210b580 100644 > > > --- a/drivers/gpu/drm/xe/xe_pci.c > > > +++ b/drivers/gpu/drm/xe/xe_pci.c > > > @@ -16,6 +16,7 @@ > > > #include "regs/xe_regs.h" > > > #include "regs/xe_gt_regs.h" > > > +#include "xe_devcoredump.h" > > > #include "xe_device.h" > > > #include "xe_display.h" > > > #include "xe_drv.h" > > > @@ -657,6 +658,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > > > return err; > > > } > > > + xe_devcoredump_init(xe); > > > xe_pm_runtime_init(xe); > > > return 0;