On Thu, Sep 15, 2011 at 08:21:00PM +0200, Michel Dänzer wrote: > From: Michel Dänzer <michel.daenzer@xxxxxxx> > > This was only the case if the GPU reset was triggered from the CS ioctl, > otherwise other processes could happily enter the CS ioctl and wreak havoc > during the GPU reset. > > This is a little complicated because the GPU reset can be triggered from the > CS ioctl, in which case we're already holding the mutex, or from other call > paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't > allow nested locking or finding out the mutex owner, so we need to handle this > with some helper functions. > > Signed-off-by: Michel Dänzer <michel.daenzer@xxxxxxx> > --- > drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++---- > drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++ > 3 files changed, 83 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index ef0e0e0..89304d9 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -1203,6 +1203,8 @@ struct radeon_device { > struct radeon_pm pm; > uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; > struct mutex cs_mutex; > + struct task_struct *cs_mutex_owner; > + struct mutex cs_mutex_owner_mutex; That is a bit of 'mutex.. mutex'. What about just having the same name as before? > struct radeon_wb wb; > struct radeon_dummy_page dummy_page; > bool gpu_lockup; > @@ -1241,6 +1243,64 @@ struct radeon_device { > struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS]; > }; > > + > +/* > + * CS mutex helpers > + */ > + > +static inline void cs_mutex_lock(struct radeon_device *rdev) > +{ > + mutex_lock(&rdev->cs_mutex); > + > + mutex_lock(&rdev->cs_mutex_owner_mutex); > + rdev->cs_mutex_owner = current; > + mutex_unlock(&rdev->cs_mutex_owner_mutex); > +} > + > +static inline void cs_mutex_unlock(struct radeon_device *rdev) > +{ > + mutex_lock(&rdev->cs_mutex_owner_mutex); > + rdev->cs_mutex_owner = NULL; > + mutex_unlock(&rdev->cs_mutex_owner_mutex); > + > + mutex_unlock(&rdev->cs_mutex); > +} > + > +/* > + * Check if this process locked the CS mutex already; if it didn't, lock it. > + * > + * Returns: > + * true: This function locked the mutex. > + * false: This function didn't lock the mutex (this process already locked it > + * before), so the caller probably shouldn't unlock it. > + */ > +static inline int cs_mutex_ensure_locked(struct radeon_device *rdev) > +{ > + int took_mutex; > + int have_mutex; I think you meant 'bool'? > + > + mutex_lock(&rdev->cs_mutex_owner_mutex); > + took_mutex = mutex_trylock(&rdev->cs_mutex); > + if (took_mutex) { > + /* The mutex was unlocked before, so it's ours now */ > + rdev->cs_mutex_owner = current; > + have_mutex = true; consider you set that here.. > + } else { > + /* Somebody already locked the mutex. Was it this process? */ > + have_mutex = (rdev->cs_mutex_owner == current); > + } > + mutex_unlock(&rdev->cs_mutex_owner_mutex); > + > + if (!have_mutex) { > + /* Another process locked the mutex, take it */ > + cs_mutex_lock(rdev); > + took_mutex = true; > + } > + > + return took_mutex; and if it is really going to be bool, you might as well change the return signature and make it the function decleration return bool instead of int. > +} > + > + > int radeon_device_init(struct radeon_device *rdev, > struct drm_device *ddev, > struct pci_dev *pdev, > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index fae00c0..61e3063 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > struct radeon_cs_chunk *ib_chunk; > int r; > > - mutex_lock(&rdev->cs_mutex); > + cs_mutex_lock(rdev); > /* initialize parser */ > memset(&parser, 0, sizeof(struct radeon_cs_parser)); > parser.filp = filp; > @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > if (r) { > DRM_ERROR("Failed to initialize parser !\n"); > radeon_cs_parser_fini(&parser, r); > - mutex_unlock(&rdev->cs_mutex); > + cs_mutex_unlock(rdev); > return r; > } > r = radeon_ib_get(rdev, &parser.ib); > if (r) { > DRM_ERROR("Failed to get ib !\n"); > radeon_cs_parser_fini(&parser, r); > - mutex_unlock(&rdev->cs_mutex); > + cs_mutex_unlock(rdev); > return r; > } > r = radeon_cs_parser_relocs(&parser); > @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > if (r != -ERESTARTSYS) > DRM_ERROR("Failed to parse relocation %d!\n", r); > radeon_cs_parser_fini(&parser, r); > - mutex_unlock(&rdev->cs_mutex); > + cs_mutex_unlock(rdev); > return r; > } > /* Copy the packet into the IB, the parser will read from the > @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > if (r || parser.parser_error) { > DRM_ERROR("Invalid command stream !\n"); > radeon_cs_parser_fini(&parser, r); > - mutex_unlock(&rdev->cs_mutex); > + cs_mutex_unlock(rdev); > return r; > } > r = radeon_cs_finish_pages(&parser); > if (r) { > DRM_ERROR("Invalid command stream !\n"); > radeon_cs_parser_fini(&parser, r); > - mutex_unlock(&rdev->cs_mutex); > + cs_mutex_unlock(rdev); > return r; > } > r = radeon_ib_schedule(rdev, parser.ib); > @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > DRM_ERROR("Failed to schedule IB !\n"); > } > radeon_cs_parser_fini(&parser, r); > - mutex_unlock(&rdev->cs_mutex); > + cs_mutex_unlock(rdev); > return r; > } > > diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c > index 5f0fd85..5f3d02d 100644 > --- a/drivers/gpu/drm/radeon/radeon_device.c > +++ b/drivers/gpu/drm/radeon/radeon_device.c > @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev, > > /* mutex initialization are all done here so we > * can recall function without having locking issues */ > + mutex_init(&rdev->cs_mutex_owner_mutex); > + rdev->cs_mutex_owner = NULL; > mutex_init(&rdev->cs_mutex); > mutex_init(&rdev->ib_pool.mutex); > mutex_init(&rdev->cp.mutex); > @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev) > { > int r; > int resched; > + int took_cs_mutex; > + > + /* Prevent CS ioctl from interfering */ > + took_cs_mutex = cs_mutex_ensure_locked(rdev); > + if (!rdev->gpu_lockup) { > + DRM_DEBUG("GPU already reset by previous CS mutex holder\n"); > + if (took_cs_mutex) > + cs_mutex_unlock(rdev); > + return 0; > + } > > radeon_save_bios_scratch_regs(rdev); > /* block TTM */ > @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev) > radeon_restore_bios_scratch_regs(rdev); > drm_helper_resume_force_mode(rdev->ddev); > ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); > + if (took_cs_mutex) > + cs_mutex_unlock(rdev); > return 0; > } > + if (took_cs_mutex) > + cs_mutex_unlock(rdev); > /* bad news, how to tell it to userspace ? */ > dev_info(rdev->dev, "GPU reset failed\n"); > return r; > -- > 1.7.6.3 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel