On 2/17/2025 8:56 PM, Alex Deucher wrote: > From: "chr[]" <chris@xxxxxxxxxxx> > > resume and irq handler happily races in set_power_state() > > * amdgpu_legacy_dpm_compute_clocks() needs lock > * protect irq work handler > * fix dpm_enabled usage > > v2: fix clang build, integrate Lijo's comments (Alex) > > Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2524 > Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c") > Tested-by: Maciej S. Szmigiero <mail@xxxxxxxxxxxxxxxxxxxxx> # on Oland PRO > Signed-off-by: chr[] <chris@xxxxxxxxxxx> > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> Thanks, Lijo > --- > drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c | 25 +++++++++++++----- > .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c | 8 ++++-- > drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 26 ++++++++++++++----- > 3 files changed, 45 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c > index 67a8e22b1126d..e237ea1185a71 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c > @@ -3042,6 +3042,7 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) > if (!amdgpu_dpm) > return 0; > > + mutex_lock(&adev->pm.mutex); > kv_dpm_setup_asic(adev); > ret = kv_dpm_enable(adev); > if (ret) > @@ -3049,6 +3050,8 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) > else > adev->pm.dpm_enabled = true; > amdgpu_legacy_dpm_compute_clocks(adev); > + mutex_unlock(&adev->pm.mutex); > + > return ret; > } > > @@ -3066,32 +3069,42 @@ static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > > + cancel_work_sync(&adev->pm.dpm.thermal.work); > + > if (adev->pm.dpm_enabled) { > + mutex_lock(&adev->pm.mutex); > + adev->pm.dpm_enabled = false; > /* disable dpm */ > kv_dpm_disable(adev); > /* reset the power state */ > adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; > + mutex_unlock(&adev->pm.mutex); > } > return 0; > } > > static int kv_dpm_resume(struct amdgpu_ip_block *ip_block) > { > - int ret; > + int ret = 0; > struct amdgpu_device *adev = ip_block->adev; > > - if (adev->pm.dpm_enabled) { > + if (!amdgpu_dpm) > + return 0; > + > + if (!adev->pm.dpm_enabled) { > + mutex_lock(&adev->pm.mutex); > /* asic init will reset to the boot state */ > kv_dpm_setup_asic(adev); > ret = kv_dpm_enable(adev); > - if (ret) > + if (ret) { > adev->pm.dpm_enabled = false; > - else > + } else { > adev->pm.dpm_enabled = true; > - if (adev->pm.dpm_enabled) > amdgpu_legacy_dpm_compute_clocks(adev); > + } > + mutex_unlock(&adev->pm.mutex); > } > - return 0; > + return ret; > } > > static bool kv_dpm_is_idle(void *handle) > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > index e861355ebd75b..c7518b13e7879 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c > @@ -1009,9 +1009,12 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) > enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; > int temp, size = sizeof(temp); > > - if (!adev->pm.dpm_enabled) > - return; > + mutex_lock(&adev->pm.mutex); > > + if (!adev->pm.dpm_enabled) { > + mutex_unlock(&adev->pm.mutex); > + return; > + } > if (!pp_funcs->read_sensor(adev->powerplay.pp_handle, > AMDGPU_PP_SENSOR_GPU_TEMP, > (void *)&temp, > @@ -1033,4 +1036,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) > adev->pm.dpm.state = dpm_state; > > amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle); > + mutex_unlock(&adev->pm.mutex); > } > diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > index a87dcf0974bc1..d6dfe2599ebea 100644 > --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c > @@ -7786,6 +7786,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) > if (!amdgpu_dpm) > return 0; > > + mutex_lock(&adev->pm.mutex); > si_dpm_setup_asic(adev); > ret = si_dpm_enable(adev); > if (ret) > @@ -7793,6 +7794,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) > else > adev->pm.dpm_enabled = true; > amdgpu_legacy_dpm_compute_clocks(adev); > + mutex_unlock(&adev->pm.mutex); > return ret; > } > > @@ -7810,32 +7812,44 @@ static int si_dpm_suspend(struct amdgpu_ip_block *ip_block) > { > struct amdgpu_device *adev = ip_block->adev; > > + cancel_work_sync(&adev->pm.dpm.thermal.work); > + > if (adev->pm.dpm_enabled) { > + mutex_lock(&adev->pm.mutex); > + adev->pm.dpm_enabled = false; > /* disable dpm */ > si_dpm_disable(adev); > /* reset the power state */ > adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; > + mutex_unlock(&adev->pm.mutex); > } > + > return 0; > } > > static int si_dpm_resume(struct amdgpu_ip_block *ip_block) > { > - int ret; > + int ret = 0; > struct amdgpu_device *adev = ip_block->adev; > > - if (adev->pm.dpm_enabled) { > + if (!amdgpu_dpm) > + return 0; > + > + if (!adev->pm.dpm_enabled) { > /* asic init will reset to the boot state */ > + mutex_lock(&adev->pm.mutex); > si_dpm_setup_asic(adev); > ret = si_dpm_enable(adev); > - if (ret) > + if (ret) { > adev->pm.dpm_enabled = false; > - else > + } else { > adev->pm.dpm_enabled = true; > - if (adev->pm.dpm_enabled) > amdgpu_legacy_dpm_compute_clocks(adev); > + } > + mutex_unlock(&adev->pm.mutex); > } > - return 0; > + > + return ret; > } > > static bool si_dpm_is_idle(void *handle)