Re: [PATCH] drm/radeon: avoid deadlock in pm path when waiting for fence

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Added to fixes and stable.  Thanks!

On Mon, Dec 17, 2012 at 11:41 AM, Christian König
<deathsimple@xxxxxxxxxxx> wrote:
> On 17.12.2012 17:04, j.glisse@xxxxxxxxx wrote:
>>
>> From: Jerome Glisse <jglisse@xxxxxxxxxx>
>>
>> radeon_fence_wait_empty_locked should not trigger GPU reset as no
>> place where it's call from would benefit from such thing and it
>> actually lead to a kernel deadlock in case the reset is triggered
>> from pm codepath. Instead force ring completion in place where it
>> makes sense or return early in others.
>>
>> Signed-off-by: Jerome Glisse <jglisse@xxxxxxxxxx>
>
>
> I wanted to stop losing GPU reset signals by moving the reset into
> radeon_fence_wait_empty locked, but it's true that in most cases it doesn't
> make much sense (suspend/finish) and I didn't know that it could cause a
> hang in the PM code.
>
> We should probably fix the PM code to properly signal this condition to it's
> caller and reset the GPU when it is possible to do so, but fixing the
> deadlock is of course more important.
>
> Also should probably go into the stable kernel as well, but anyway it is:
>
> Reviewed-by: Christian König <christian.koenig@xxxxxxx>
>
>
>> ---
>>   drivers/gpu/drm/radeon/radeon.h        |  2 +-
>>   drivers/gpu/drm/radeon/radeon_device.c | 13 +++++++++++--
>>   drivers/gpu/drm/radeon/radeon_fence.c  | 30
>> ++++++++++++++----------------
>>   drivers/gpu/drm/radeon/radeon_pm.c     | 15 ++++++++++++---
>>   4 files changed, 38 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 9c7625c..071b2d7 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -231,7 +231,7 @@ void radeon_fence_process(struct radeon_device *rdev,
>> int ring);
>>   bool radeon_fence_signaled(struct radeon_fence *fence);
>>   int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
>>   int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
>> -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int
>> ring);
>> +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
>>   int radeon_fence_wait_any(struct radeon_device *rdev,
>>                           struct radeon_fence **fences,
>>                           bool intr);
>> diff --git a/drivers/gpu/drm/radeon/radeon_device.c
>> b/drivers/gpu/drm/radeon/radeon_device.c
>> index 774fae7..53a9223 100644
>> --- a/drivers/gpu/drm/radeon/radeon_device.c
>> +++ b/drivers/gpu/drm/radeon/radeon_device.c
>> @@ -1163,6 +1163,7 @@ int radeon_suspend_kms(struct drm_device *dev,
>> pm_message_t state)
>>         struct drm_crtc *crtc;
>>         struct drm_connector *connector;
>>         int i, r;
>> +       bool force_completion = false;
>>         if (dev == NULL || dev->dev_private == NULL) {
>>                 return -ENODEV;
>> @@ -1205,8 +1206,16 @@ int radeon_suspend_kms(struct drm_device *dev,
>> pm_message_t state)
>>         mutex_lock(&rdev->ring_lock);
>>         /* wait for gpu to finish processing current batch */
>> -       for (i = 0; i < RADEON_NUM_RINGS; i++)
>> -               radeon_fence_wait_empty_locked(rdev, i);
>> +       for (i = 0; i < RADEON_NUM_RINGS; i++) {
>> +               r = radeon_fence_wait_empty_locked(rdev, i);
>> +               if (r) {
>> +                       /* delay GPU reset to resume */
>> +                       force_completion = true;
>> +               }
>> +       }
>> +       if (force_completion) {
>> +               radeon_fence_driver_force_completion(rdev);
>> +       }
>>         mutex_unlock(&rdev->ring_lock);
>>         radeon_save_bios_scratch_regs(rdev);
>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>> b/drivers/gpu/drm/radeon/radeon_fence.c
>> index bf7b20e..28c09b6 100644
>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>> @@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct
>> radeon_device *rdev, int ring)
>>    * Returns 0 if the fences have passed, error for all other cases.
>>    * Caller must hold ring lock.
>>    */
>> -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
>> +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
>>   {
>>         uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
>> +       int r;
>>   -     while(1) {
>> -               int r;
>> -               r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
>> +       r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
>> +       if (r) {
>>                 if (r == -EDEADLK) {
>> -                       mutex_unlock(&rdev->ring_lock);
>> -                       r = radeon_gpu_reset(rdev);
>> -                       mutex_lock(&rdev->ring_lock);
>> -                       if (!r)
>> -                               continue;
>> -               }
>> -               if (r) {
>> -                       dev_err(rdev->dev, "error waiting for ring to
>> become"
>> -                               " idle (%d)\n", r);
>> +                       return -EDEADLK;
>>                 }
>> -               return;
>> +               dev_err(rdev->dev, "error waiting for ring[%d] to become
>> idle (%d)\n",
>> +                       ring, r);
>>         }
>> +       return 0;
>>   }
>>     /**
>> @@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device
>> *rdev)
>>    */
>>   void radeon_fence_driver_fini(struct radeon_device *rdev)
>>   {
>> -       int ring;
>> +       int ring, r;
>>         mutex_lock(&rdev->ring_lock);
>>         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
>>                 if (!rdev->fence_drv[ring].initialized)
>>                         continue;
>> -               radeon_fence_wait_empty_locked(rdev, ring);
>> +               r = radeon_fence_wait_empty_locked(rdev, ring);
>> +               if (r) {
>> +                       /* no need to trigger GPU reset as we are
>> unloading */
>> +                       radeon_fence_driver_force_completion(rdev);
>> +               }
>>                 wake_up_all(&rdev->fence_queue);
>>                 radeon_scratch_free(rdev,
>> rdev->fence_drv[ring].scratch_reg);
>>                 rdev->fence_drv[ring].initialized = false;
>> diff --git a/drivers/gpu/drm/radeon/radeon_pm.c
>> b/drivers/gpu/drm/radeon/radeon_pm.c
>> index aa14dbb..0bfa656 100644
>> --- a/drivers/gpu/drm/radeon/radeon_pm.c
>> +++ b/drivers/gpu/drm/radeon/radeon_pm.c
>> @@ -234,7 +234,7 @@ static void radeon_set_power_state(struct
>> radeon_device *rdev)
>>     static void radeon_pm_set_clocks(struct radeon_device *rdev)
>>   {
>> -       int i;
>> +       int i, r;
>>         /* no need to take locks, etc. if nothing's going to change */
>>         if ((rdev->pm.requested_clock_mode_index ==
>> rdev->pm.current_clock_mode_index) &&
>> @@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device
>> *rdev)
>>         /* wait for the rings to drain */
>>         for (i = 0; i < RADEON_NUM_RINGS; i++) {
>>                 struct radeon_ring *ring = &rdev->ring[i];
>> -               if (ring->ready)
>> -                       radeon_fence_wait_empty_locked(rdev, i);
>> +               if (!ring->ready) {
>> +                       continue;
>> +               }
>> +               r = radeon_fence_wait_empty_locked(rdev, i);
>> +               if (r) {
>> +                       /* needs a GPU reset dont reset here */
>> +                       mutex_unlock(&rdev->ring_lock);
>> +                       up_write(&rdev->pm.mclk_lock);
>> +                       mutex_unlock(&rdev->ddev->struct_mutex);
>> +                       return;
>> +               }
>>         }
>>         radeon_unmap_vram_bos(rdev);
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux