On Mon, Aug 18, 2014 at 11:02 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > Am 18.08.2014 um 16:45 schrieb Maarten Lankhorst: > >> This is needed for the next commit, because the lockup detection >> will need the read lock to run. >> >> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxx> >> --- >> drivers/gpu/drm/radeon/radeon.h | 2 +- >> drivers/gpu/drm/radeon/radeon_device.c | 61 >> ++++++++++++++++++++-------------- >> 2 files changed, 37 insertions(+), 26 deletions(-) >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h >> b/drivers/gpu/drm/radeon/radeon.h >> index 9e1732eb402c..1d806983ec7b 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -2312,7 +2312,7 @@ struct radeon_device { >> bool need_dma32; >> bool accel_working; >> bool fastfb_working; /* IGP feature*/ >> - bool needs_reset; >> + bool needs_reset, in_reset; >> struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; >> const struct firmware *me_fw; /* all family ME firmware */ >> const struct firmware *pfp_fw; /* r6/700 PFP firmware */ >> diff --git a/drivers/gpu/drm/radeon/radeon_device.c >> b/drivers/gpu/drm/radeon/radeon_device.c >> index c8ea050c8fa4..82633fdd399d 100644 >> --- a/drivers/gpu/drm/radeon/radeon_device.c >> +++ b/drivers/gpu/drm/radeon/radeon_device.c >> @@ -1671,29 +1671,34 @@ int radeon_gpu_reset(struct radeon_device *rdev) >> down_write(&rdev->exclusive_lock); >> if (!rdev->needs_reset) { >> + WARN_ON(rdev->in_reset); >> up_write(&rdev->exclusive_lock); >> return 0; >> } >> rdev->needs_reset = false; >> - >> - radeon_save_bios_scratch_regs(rdev); >> - /* block TTM */ >> resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); >> - radeon_pm_suspend(rdev); >> - radeon_suspend(rdev); >> - for (i = 0; i < RADEON_NUM_RINGS; ++i) { >> - ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i], >> - &ring_data[i]); >> - if (ring_sizes[i]) { >> - saved = true; >> - dev_info(rdev->dev, "Saved %d dwords of commands " >> - "on ring %d.\n", ring_sizes[i], i); >> + if (!rdev->in_reset) { >> + rdev->in_reset = true; >> + >> + radeon_save_bios_scratch_regs(rdev); >> + /* block TTM */ >> + radeon_pm_suspend(rdev); >> + radeon_suspend(rdev); > > > That won't work correctly because you might end up with calling pm_resume > more often than suspend and that can only lead to a crash. Saying this we > probably already have a bug in the reset code at this point anyway, but see > below. > > >> + >> + for (i = 0; i < RADEON_NUM_RINGS; ++i) { >> + ring_sizes[i] = radeon_ring_backup(rdev, >> &rdev->ring[i], >> + &ring_data[i]); >> + if (ring_sizes[i]) { >> + saved = true; >> + dev_info(rdev->dev, "Saved %d dwords of >> commands " >> + "on ring %d.\n", ring_sizes[i], >> i); >> + } >> } >> - } >> + } else >> + memset(ring_data, 0, sizeof(ring_data)); >> -retry: >> r = radeon_asic_reset(rdev); >> if (!r) { >> dev_info(rdev->dev, "GPU reset succeeded, trying to >> resume\n"); >> @@ -1702,40 +1707,46 @@ retry: >> radeon_restore_bios_scratch_regs(rdev); > > > We should resume PM here as well. > > >> - if (!r) { >> + if (!r && saved) { >> for (i = 0; i < RADEON_NUM_RINGS; ++i) { >> radeon_ring_restore(rdev, &rdev->ring[i], >> ring_sizes[i], ring_data[i]); >> - ring_sizes[i] = 0; >> ring_data[i] = NULL; >> } >> + } else { >> + radeon_fence_driver_force_completion(rdev); >> + >> + for (i = 0; i < RADEON_NUM_RINGS; ++i) { >> + kfree(ring_data[i]); >> + } >> + } >> + downgrade_write(&rdev->exclusive_lock); >> + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); > > > I would unlock the delayed_workqueue first and then downgrade the readlock. > > >> + if (!r) { >> r = radeon_ib_ring_tests(rdev); >> if (r) { >> dev_err(rdev->dev, "ib ring test failed (%d).\n", >> r); >> if (saved) { >> - saved = false; >> + /* if reset fails, try without saving data >> */ >> + rdev->needs_reset = true; >> radeon_suspend(rdev); >> - goto retry; >> + up_read(&rdev->exclusive_lock); >> + return -EAGAIN; >> } >> } >> - } else { >> - radeon_fence_driver_force_completion(rdev); >> - for (i = 0; i < RADEON_NUM_RINGS; ++i) { >> - kfree(ring_data[i]); >> - } >> } >> > > >> radeon_pm_resume(rdev); > > > Move this more up. > > Alex is more into this, but it's probably a bug in the current reset code > that this is after the IB tests, cause the IB tests needs everything powered > up and with PM handling suspended it is possible that individual blocks are > powered down. Yeah, looks like a bug. I think the attached patch should fix it. Alex > > Thanks, > Christian. > > >> drm_helper_resume_force_mode(rdev->ddev); >> - ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); >> if (r) { >> /* bad news, how to tell it to userspace ? */ >> dev_info(rdev->dev, "GPU reset failed\n"); >> } >> - up_write(&rdev->exclusive_lock); >> + rdev->in_reset = false; >> + up_read(&rdev->exclusive_lock); >> return r; >> } >> > > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel
From 43021a83cf04064e9771964233487fe9f49fa3db Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@xxxxxxx> Date: Mon, 18 Aug 2014 11:57:28 -0400 Subject: [PATCH] drm/radeon: fix pm handling in radeon_gpu_reset pm_suspend is handled in the radeon_suspend callbacks. pm_resume has special handling depending on whether dpm or legacy pm is enabled. Change radeon_gpu_reset to mirror the behavior in the suspend and resume pathes. Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Cc: stable@xxxxxxxxxxxxxxx --- drivers/gpu/drm/radeon/radeon_device.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c8ea050..8e61870 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1680,7 +1680,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); - radeon_pm_suspend(rdev); radeon_suspend(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -1726,9 +1725,24 @@ retry: } } - radeon_pm_resume(rdev); + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + /* do dpm late init */ + r = radeon_pm_late_init(rdev); + if (r) { + rdev->pm.dpm_enabled = false; + DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); + } + } else { + /* resume old pm late */ + radeon_pm_resume(rdev); + } + drm_helper_resume_force_mode(rdev->ddev); + /* set the power state here in case we are a PX system or headless */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) + radeon_pm_compute_clocks(rdev); + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ -- 1.8.3.1
_______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel