Added to fixes and stable. Thanks! On Mon, Dec 17, 2012 at 11:41 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > On 17.12.2012 17:04, j.glisse@xxxxxxxxx wrote: >> >> From: Jerome Glisse <jglisse@xxxxxxxxxx> >> >> radeon_fence_wait_empty_locked should not trigger GPU reset as no >> place where it's call from would benefit from such thing and it >> actually lead to a kernel deadlock in case the reset is triggered >> from pm codepath. Instead force ring completion in place where it >> makes sense or return early in others. >> >> Signed-off-by: Jerome Glisse <jglisse@xxxxxxxxxx> > > > I wanted to stop losing GPU reset signals by moving the reset into > radeon_fence_wait_empty locked, but it's true that in most cases it doesn't > make much sense (suspend/finish) and I didn't know that it could cause a > hang in the PM code. > > We should probably fix the PM code to properly signal this condition to it's > caller and reset the GPU when it is possible to do so, but fixing the > deadlock is of course more important. > > Also should probably go into the stable kernel as well, but anyway it is: > > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > >> --- >> drivers/gpu/drm/radeon/radeon.h | 2 +- >> drivers/gpu/drm/radeon/radeon_device.c | 13 +++++++++++-- >> drivers/gpu/drm/radeon/radeon_fence.c | 30 >> ++++++++++++++---------------- >> drivers/gpu/drm/radeon/radeon_pm.c | 15 ++++++++++++--- >> 4 files changed, 38 insertions(+), 22 deletions(-) >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h >> b/drivers/gpu/drm/radeon/radeon.h >> index 9c7625c..071b2d7 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -231,7 +231,7 @@ void radeon_fence_process(struct radeon_device *rdev, >> int ring); >> bool radeon_fence_signaled(struct radeon_fence *fence); >> int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); >> int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); >> -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int >> ring); >> +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); >> int radeon_fence_wait_any(struct radeon_device *rdev, >> struct radeon_fence **fences, >> bool intr); >> diff --git a/drivers/gpu/drm/radeon/radeon_device.c >> b/drivers/gpu/drm/radeon/radeon_device.c >> index 774fae7..53a9223 100644 >> --- a/drivers/gpu/drm/radeon/radeon_device.c >> +++ b/drivers/gpu/drm/radeon/radeon_device.c >> @@ -1163,6 +1163,7 @@ int radeon_suspend_kms(struct drm_device *dev, >> pm_message_t state) >> struct drm_crtc *crtc; >> struct drm_connector *connector; >> int i, r; >> + bool force_completion = false; >> if (dev == NULL || dev->dev_private == NULL) { >> return -ENODEV; >> @@ -1205,8 +1206,16 @@ int radeon_suspend_kms(struct drm_device *dev, >> pm_message_t state) >> mutex_lock(&rdev->ring_lock); >> /* wait for gpu to finish processing current batch */ >> - for (i = 0; i < RADEON_NUM_RINGS; i++) >> - radeon_fence_wait_empty_locked(rdev, i); >> + for (i = 0; i < RADEON_NUM_RINGS; i++) { >> + r = radeon_fence_wait_empty_locked(rdev, i); >> + if (r) { >> + /* delay GPU reset to resume */ >> + force_completion = true; >> + } >> + } >> + if (force_completion) { >> + radeon_fence_driver_force_completion(rdev); >> + } >> mutex_unlock(&rdev->ring_lock); >> radeon_save_bios_scratch_regs(rdev); >> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c >> b/drivers/gpu/drm/radeon/radeon_fence.c >> index bf7b20e..28c09b6 100644 >> --- a/drivers/gpu/drm/radeon/radeon_fence.c >> +++ b/drivers/gpu/drm/radeon/radeon_fence.c >> @@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct >> radeon_device *rdev, int ring) >> * Returns 0 if the fences have passed, error for all other cases. >> * Caller must hold ring lock. >> */ >> -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) >> +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) >> { >> uint64_t seq = rdev->fence_drv[ring].sync_seq[ring]; >> + int r; >> - while(1) { >> - int r; >> - r = radeon_fence_wait_seq(rdev, seq, ring, false, false); >> + r = radeon_fence_wait_seq(rdev, seq, ring, false, false); >> + if (r) { >> if (r == -EDEADLK) { >> - mutex_unlock(&rdev->ring_lock); >> - r = radeon_gpu_reset(rdev); >> - mutex_lock(&rdev->ring_lock); >> - if (!r) >> - continue; >> - } >> - if (r) { >> - dev_err(rdev->dev, "error waiting for ring to >> become" >> - " idle (%d)\n", r); >> + return -EDEADLK; >> } >> - return; >> + dev_err(rdev->dev, "error waiting for ring[%d] to become >> idle (%d)\n", >> + ring, r); >> } >> + return 0; >> } >> /** >> @@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device >> *rdev) >> */ >> void radeon_fence_driver_fini(struct radeon_device *rdev) >> { >> - int ring; >> + int ring, r; >> mutex_lock(&rdev->ring_lock); >> for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { >> if (!rdev->fence_drv[ring].initialized) >> continue; >> - radeon_fence_wait_empty_locked(rdev, ring); >> + r = radeon_fence_wait_empty_locked(rdev, ring); >> + if (r) { >> + /* no need to trigger GPU reset as we are >> unloading */ >> + radeon_fence_driver_force_completion(rdev); >> + } >> wake_up_all(&rdev->fence_queue); >> radeon_scratch_free(rdev, >> rdev->fence_drv[ring].scratch_reg); >> rdev->fence_drv[ring].initialized = false; >> diff --git a/drivers/gpu/drm/radeon/radeon_pm.c >> b/drivers/gpu/drm/radeon/radeon_pm.c >> index aa14dbb..0bfa656 100644 >> --- a/drivers/gpu/drm/radeon/radeon_pm.c >> +++ b/drivers/gpu/drm/radeon/radeon_pm.c >> @@ -234,7 +234,7 @@ static void radeon_set_power_state(struct >> radeon_device *rdev) >> static void radeon_pm_set_clocks(struct radeon_device *rdev) >> { >> - int i; >> + int i, r; >> /* no need to take locks, etc. if nothing's going to change */ >> if ((rdev->pm.requested_clock_mode_index == >> rdev->pm.current_clock_mode_index) && >> @@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device >> *rdev) >> /* wait for the rings to drain */ >> for (i = 0; i < RADEON_NUM_RINGS; i++) { >> struct radeon_ring *ring = &rdev->ring[i]; >> - if (ring->ready) >> - radeon_fence_wait_empty_locked(rdev, i); >> + if (!ring->ready) { >> + continue; >> + } >> + r = radeon_fence_wait_empty_locked(rdev, i); >> + if (r) { >> + /* needs a GPU reset dont reset here */ >> + mutex_unlock(&rdev->ring_lock); >> + up_write(&rdev->pm.mclk_lock); >> + mutex_unlock(&rdev->ddev->struct_mutex); >> + return; >> + } >> } >> radeon_unmap_vram_bos(rdev); > > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel