Unfortunately devcoredump infrastructure does not provide and interface for us to force the device removal upon the pci_remove time of our device. The devcoredump is linked at the device level, so when in use it will prevent the module removal, but it doesn't prevent the call of the pci_remove callback. This callback cannot fail anyway and we end up clearing and freeing the entire pci device. Hence, after we removed the pci device, we shouldn't allow any read or free operations to avoid segmentation fault. Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> --- drivers/gpu/drm/xe/xe_devcoredump.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d9531183f03a..a08929c01b75 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -42,6 +42,11 @@ * hang capture. */ +static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) +{ + return container_of(coredump, struct xe_device, devcoredump); +} + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { @@ -51,6 +56,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct drm_print_iterator iter; struct timespec64 ts; + /* Our device is gone already... */ + if (!data || !coredump_to_xe(coredump)) + return -ENODEV; + iter.data = buffer; iter.offset = 0; iter.start = offset; @@ -80,12 +89,16 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; - struct xe_device *xe = container_of(coredump, struct xe_device, - devcoredump); + + /* Our device is gone. Nothing to do... */ + if (!data || !coredump_to_xe(coredump)) + return; + mutex_lock(&coredump->lock); coredump->faulty_engine = NULL; - drm_info(&xe->drm, "Xe device coredump has been deleted.\n"); + drm_info(&coredump_to_xe(coredump)->drm, + "Xe device coredump has been deleted.\n"); mutex_unlock(&coredump->lock); } -- 2.39.2