From: Michel Dänzer <mdaenzer@xxxxxxxxxx>
schedule_delayed_work does not push back the work if it was already
scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms
after the first time GFXOFF was disabled and re-enabled, even if
GFXOFF
was disabled and re-enabled again during those 100 ms.
This resulted in frame drops / stutter with the upcoming mutter 41
release on Navi 14, due to constantly enabling GFXOFF in the HW and
disabling it again (for getting the GPU clock counter).
To fix this, call cancel_delayed_work_sync when the disable count
transitions from 0 to 1, and only schedule the delayed work on the
reverse transition, not if the disable count was already 0. This
makes
sure the delayed work doesn't run at unexpected times, and
allows it to
be lock-free.
v2:
* Use cancel_delayed_work_sync & mutex_trylock instead of
mod_delayed_work.
v3:
* Make amdgpu_device_delay_enable_gfx_off lock-free (Christian
König)
v4:
* Fix race condition between amdgpu_gfx_off_ctrl incrementing
adev->gfx.gfx_off_req_count and
amdgpu_device_delay_enable_gfx_off
checking for it to be 0 (Evan Quan)
Cc: stable@xxxxxxxxxxxxxxx
Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> # v3
Acked-by: Christian König <christian.koenig@xxxxxxx> # v3
Signed-off-by: Michel Dänzer <mdaenzer@xxxxxxxxxx>
---
Alex, probably best to wait a bit longer before picking this up. :)
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++----
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 36
+++++++++++++++-------
2 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f3fd5ec710b6..f944ed858f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2777,12 +2777,11 @@ static void
amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device,
gfx.gfx_off_delay_work.work);
- mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!adev->gfx.gfx_off_state &&
!adev->gfx.gfx_off_req_count) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GFX, true))
- adev->gfx.gfx_off_state = true;
- }
- mutex_unlock(&adev->gfx.gfx_off_mutex);
+ WARN_ON_ONCE(adev->gfx.gfx_off_state);
+ WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+ if (!amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GFX, true))
+ adev->gfx.gfx_off_state = true;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a0be0772c8b3..b4ced45301be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -563,24 +563,38 @@ void amdgpu_gfx_off_ctrl(struct
amdgpu_device *adev, bool enable)
mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!enable)
- adev->gfx.gfx_off_req_count++;
- else if (adev->gfx.gfx_off_req_count > 0)
+ if (enable) {
+ /* If the count is already 0, it means there's an
imbalance bug somewhere.
+ * Note that the bug may be in a different caller than
the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+ goto unlock;
+
adev->gfx.gfx_off_req_count--;
- if (enable && !adev->gfx.gfx_off_state &&
!adev->gfx.gfx_off_req_count) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
GFX_OFF_DELAY_ENABLE);
- } else if (!enable && adev->gfx.gfx_off_state) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GFX, false)) {
- adev->gfx.gfx_off_state = false;
+ if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state)
+
schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
GFX_OFF_DELAY_ENABLE);
+ } else {
+ if (adev->gfx.gfx_off_req_count == 0) {
+
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+ if (adev->gfx.gfx_off_state &&