On Wed, Apr 26, 2023 at 04:57:02PM -0400, Rodrigo Vivi wrote: > Unfortunately devcoredump infrastructure does not provide and > interface for us to force the device removal upon the pci_remove > time of our device. > > The devcoredump is linked at the device level, so when in use > it will prevent the module removal, but it doesn't prevent the > call of the pci_remove callback. This callback cannot fail > anyway and we end up clearing and freeing the entire pci device. > > Hence, after we removed the pci device, we shouldn't allow any > read or free operations to avoid segmentation fault. > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > --- > drivers/gpu/drm/xe/xe_devcoredump.c | 19 ++++++++++++++++--- > 1 file changed, 16 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c > index d9531183f03a..a08929c01b75 100644 > --- a/drivers/gpu/drm/xe/xe_devcoredump.c > +++ b/drivers/gpu/drm/xe/xe_devcoredump.c > @@ -42,6 +42,11 @@ > * hang capture. > */ > > +static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) > +{ > + return container_of(coredump, struct xe_device, devcoredump); Confused how still would ever return NULL, can you explain? Matt > +} > + > static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, > size_t count, void *data, size_t datalen) > { > @@ -51,6 +56,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, > struct drm_print_iterator iter; > struct timespec64 ts; > > + /* Our device is gone already... */ > + if (!data || !coredump_to_xe(coredump)) > + return -ENODEV; > + > iter.data = buffer; > iter.offset = 0; > iter.start = offset; > @@ -80,12 +89,16 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, > static void xe_devcoredump_free(void *data) > { > struct xe_devcoredump *coredump = data; > - struct xe_device *xe = container_of(coredump, struct xe_device, > - devcoredump); > + > + /* Our device is gone. Nothing to do... */ > + if (!data || !coredump_to_xe(coredump)) > + return; > + > mutex_lock(&coredump->lock); > > coredump->faulty_engine = NULL; > - drm_info(&xe->drm, "Xe device coredump has been deleted.\n"); > + drm_info(&coredump_to_xe(coredump)->drm, > + "Xe device coredump has been deleted.\n"); > > mutex_unlock(&coredump->lock); > } > -- > 2.39.2 >