Re: [PATCH 1/2] drm/radeon: Make sure CS mutex is held across GPU reset.

Alex Deucher <alexdeucher@xxxxxxxxx> · Thu, 15 Sep 2011 17:21:59 -0400

2011/9/15 Michel Dänzer <michel@xxxxxxxxxxx>:
> From: Michel Dänzer <michel.daenzer@xxxxxxx>
>
> This was only the case if the GPU reset was triggered from the CS ioctl,
> otherwise other processes could happily enter the CS ioctl and wreak havoc
> during the GPU reset.
>
> This is a little complicated because the GPU reset can be triggered from the
> CS ioctl, in which case we're already holding the mutex, or from other call
> paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't
> allow nested locking or finding out the mutex owner, so we need to handle this
> with some helper functions.
>
> Signed-off-by: Michel Dänzer <michel.daenzer@xxxxxxx>

For both patches:

Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx>

> ---
>  drivers/gpu/drm/radeon/radeon.h        |   60 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/radeon/radeon_cs.c     |   14 ++++----
>  drivers/gpu/drm/radeon/radeon_device.c |   16 ++++++++
>  3 files changed, 83 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index ef0e0e0..89304d9 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1203,6 +1203,8 @@ struct radeon_device {
>        struct radeon_pm                pm;
>        uint32_t                        bios_scratch[RADEON_BIOS_NUM_SCRATCH];
>        struct mutex                    cs_mutex;
> +       struct task_struct              *cs_mutex_owner;
> +       struct mutex                    cs_mutex_owner_mutex;
>        struct radeon_wb                wb;
>        struct radeon_dummy_page        dummy_page;
>        bool                            gpu_lockup;
> @@ -1241,6 +1243,64 @@ struct radeon_device {
>        struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
>  };
>
> +
> +/*
> + * CS mutex helpers
> + */
> +
> +static inline void cs_mutex_lock(struct radeon_device *rdev)
> +{
> +       mutex_lock(&rdev->cs_mutex);
> +
> +       mutex_lock(&rdev->cs_mutex_owner_mutex);
> +       rdev->cs_mutex_owner = current;
> +       mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +}
> +
> +static inline void cs_mutex_unlock(struct radeon_device *rdev)
> +{
> +       mutex_lock(&rdev->cs_mutex_owner_mutex);
> +       rdev->cs_mutex_owner = NULL;
> +       mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +
> +       mutex_unlock(&rdev->cs_mutex);
> +}
> +
> +/*
> + * Check if this process locked the CS mutex already; if it didn't, lock it.
> + *
> + * Returns:
> + * true: This function locked the mutex.
> + * false: This function didn't lock the mutex (this process already locked it
> + * before), so the caller probably shouldn't unlock it.
> + */
> +static inline int cs_mutex_ensure_locked(struct radeon_device *rdev)
> +{
> +       int took_mutex;
> +       int have_mutex;
> +
> +       mutex_lock(&rdev->cs_mutex_owner_mutex);
> +       took_mutex = mutex_trylock(&rdev->cs_mutex);
> +       if (took_mutex) {
> +               /* The mutex was unlocked before, so it's ours now */
> +               rdev->cs_mutex_owner = current;
> +               have_mutex = true;
> +       } else {
> +               /* Somebody already locked the mutex. Was it this process? */
> +               have_mutex = (rdev->cs_mutex_owner == current);
> +       }
> +       mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +
> +       if (!have_mutex) {
> +               /* Another process locked the mutex, take it */
> +               cs_mutex_lock(rdev);
> +               took_mutex = true;
> +       }
> +
> +       return took_mutex;
> +}
> +
> +
>  int radeon_device_init(struct radeon_device *rdev,
>                       struct drm_device *ddev,
>                       struct pci_dev *pdev,
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index fae00c0..61e3063 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>        struct radeon_cs_chunk *ib_chunk;
>        int r;
>
> -       mutex_lock(&rdev->cs_mutex);
> +       cs_mutex_lock(rdev);
>        /* initialize parser */
>        memset(&parser, 0, sizeof(struct radeon_cs_parser));
>        parser.filp = filp;
> @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>        if (r) {
>                DRM_ERROR("Failed to initialize parser !\n");
>                radeon_cs_parser_fini(&parser, r);
> -               mutex_unlock(&rdev->cs_mutex);
> +               cs_mutex_unlock(rdev);
>                return r;
>        }
>        r =  radeon_ib_get(rdev, &parser.ib);
>        if (r) {
>                DRM_ERROR("Failed to get ib !\n");
>                radeon_cs_parser_fini(&parser, r);
> -               mutex_unlock(&rdev->cs_mutex);
> +               cs_mutex_unlock(rdev);
>                return r;
>        }
>        r = radeon_cs_parser_relocs(&parser);
> @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>                if (r != -ERESTARTSYS)
>                        DRM_ERROR("Failed to parse relocation %d!\n", r);
>                radeon_cs_parser_fini(&parser, r);
> -               mutex_unlock(&rdev->cs_mutex);
> +               cs_mutex_unlock(rdev);
>                return r;
>        }
>        /* Copy the packet into the IB, the parser will read from the
> @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>        if (r || parser.parser_error) {
>                DRM_ERROR("Invalid command stream !\n");
>                radeon_cs_parser_fini(&parser, r);
> -               mutex_unlock(&rdev->cs_mutex);
> +               cs_mutex_unlock(rdev);
>                return r;
>        }
>        r = radeon_cs_finish_pages(&parser);
>        if (r) {
>                DRM_ERROR("Invalid command stream !\n");
>                radeon_cs_parser_fini(&parser, r);
> -               mutex_unlock(&rdev->cs_mutex);
> +               cs_mutex_unlock(rdev);
>                return r;
>        }
>        r = radeon_ib_schedule(rdev, parser.ib);
> @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>                DRM_ERROR("Failed to schedule IB !\n");
>        }
>        radeon_cs_parser_fini(&parser, r);
> -       mutex_unlock(&rdev->cs_mutex);
> +       cs_mutex_unlock(rdev);
>        return r;
>  }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index 5f0fd85..5f3d02d 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
>
>        /* mutex initialization are all done here so we
>         * can recall function without having locking issues */
> +       mutex_init(&rdev->cs_mutex_owner_mutex);
> +       rdev->cs_mutex_owner = NULL;
>        mutex_init(&rdev->cs_mutex);
>        mutex_init(&rdev->ib_pool.mutex);
>        mutex_init(&rdev->cp.mutex);
> @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>  {
>        int r;
>        int resched;
> +       int took_cs_mutex;
> +
> +       /* Prevent CS ioctl from interfering */
> +       took_cs_mutex = cs_mutex_ensure_locked(rdev);
> +       if (!rdev->gpu_lockup) {
> +               DRM_DEBUG("GPU already reset by previous CS mutex holder\n");
> +               if (took_cs_mutex)
> +                       cs_mutex_unlock(rdev);
> +               return 0;
> +       }
>
>        radeon_save_bios_scratch_regs(rdev);
>        /* block TTM */
> @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>                radeon_restore_bios_scratch_regs(rdev);
>                drm_helper_resume_force_mode(rdev->ddev);
>                ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> +               if (took_cs_mutex)
> +                       cs_mutex_unlock(rdev);
>                return 0;
>        }
> +       if (took_cs_mutex)
> +               cs_mutex_unlock(rdev);
>        /* bad news, how to tell it to userspace ? */
>        dev_info(rdev->dev, "GPU reset failed\n");
>        return r;
> --
> 1.7.6.3
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel