Re: [PATCH] amdgpu/pm/legacy: fix suspend/resume issues

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2/17/2025 3:26 PM, Lazar, Lijo wrote:
> 
> 
> On 2/12/2025 9:41 PM, Alex Deucher wrote:
>> From: "chr[]" <chris@xxxxxxxxxxx>
>>
>> resume and irq handler happily races in set_power_state()
>>
>> * amdgpu_legacy_dpm_compute_clocks() needs lock
>> * protect irq work handler
>> * fix dpm_enabled usage
>>
>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2524
>> Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")
>> Signed-off-by: chr[] <chris@xxxxxxxxxxx>
>> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
>> ---
>>  drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c    | 19 ++++++++++++++----
>>  .../gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c    |  2 ++
>>  drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c    | 20 +++++++++++++++----
>>  3 files changed, 33 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
>> index 67a8e22b1126d..db0d3d4dba685 100644
>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
>> @@ -3042,6 +3042,7 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block)
>>  	if (!amdgpu_dpm)
>>  		return 0;
>>  
>> +	mutex_lock(&adev->pm.mutex);
>>  	kv_dpm_setup_asic(adev);
>>  	ret = kv_dpm_enable(adev);
>>  	if (ret)
>> @@ -3049,6 +3050,8 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block)
>>  	else
>>  		adev->pm.dpm_enabled = true;
>>  	amdgpu_legacy_dpm_compute_clocks(adev);
>> +	mutex_unlock(&adev->pm.mutex);
>> +
>>  	return ret;
>>  }
>>  
>> @@ -3067,10 +3070,13 @@ static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block)
>>  	struct amdgpu_device *adev = ip_block->adev;
>>  
>>  	if (adev->pm.dpm_enabled) {
>> +		mutex_lock(&adev->pm.mutex);
>> +		adev->pm.dpm_enabled = false;
>>  		/* disable dpm */
>>  		kv_dpm_disable(adev);
>>  		/* reset the power state */
>>  		adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
>> +		mutex_unlock(&adev->pm.mutex);
>>  	}
>>  	return 0;
>>  }
>> @@ -3080,18 +3086,23 @@ static int kv_dpm_resume(struct amdgpu_ip_block *ip_block)
>>  	int ret;
>>  	struct amdgpu_device *adev = ip_block->adev;
>>  
>> -	if (adev->pm.dpm_enabled) {
>> +	if (!amdgpu_dpm)
>> +		return 0;
>> +
>> +	if (!adev->pm.dpm_enabled) {
>> +		mutex_lock(&adev->pm.mutex);
>>  		/* asic init will reset to the boot state */
>>  		kv_dpm_setup_asic(adev);
>>  		ret = kv_dpm_enable(adev);
>>  		if (ret)
>>  			adev->pm.dpm_enabled = false;
> 
> Need braces here as well since else part is with braces.
>> -		else
>> +		else {
>>  			adev->pm.dpm_enabled = true;
>> -		if (adev->pm.dpm_enabled)
>>  			amdgpu_legacy_dpm_compute_clocks(adev);
>> +		}
>> +		mutex_unlock(&adev->pm.mutex);
>>  	}
>> -	return 0;
>> +	return ret;
> 
> Possibility of returning unintialized value.
> 
>>  }
>>  
>>  static bool kv_dpm_is_idle(void *handle)
>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>> index e861355ebd75b..e06c25c945007 100644
>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
>> @@ -1012,6 +1012,7 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
>>  	if (!adev->pm.dpm_enabled)
>>  		return;
>>  
>> +	mutex_lock(&adev->pm.mutex);
> 
> I think taking this lock should happen before the dpm_enabled variable
> check. Otherwise, it could happen that part of this sequence might still
> go through after a kv/si_dpm_suspend.

One more comment - probably it's worthwhile to cancel any scheduled
amdgpu_dpm_thermal_work at the beginning of *_dpm_suspend.

Thanks,
Lijo

> 
>>  	if (!pp_funcs->read_sensor(adev->powerplay.pp_handle,
>>  				   AMDGPU_PP_SENSOR_GPU_TEMP,
>>  				   (void *)&temp,
>> @@ -1033,4 +1034,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
>>  	adev->pm.dpm.state = dpm_state;
>>  
>>  	amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle);
>> +	mutex_unlock(&adev->pm.mutex);
>>  }
>> diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>> index a87dcf0974bc1..c40ec4f74afb0 100644
>> --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>> +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
>> @@ -7786,6 +7786,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block)
>>  	if (!amdgpu_dpm)
>>  		return 0;
>>  
>> +	mutex_lock(&adev->pm.mutex);
>>  	si_dpm_setup_asic(adev);
>>  	ret = si_dpm_enable(adev);
>>  	if (ret)
>> @@ -7793,6 +7794,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block)
>>  	else
>>  		adev->pm.dpm_enabled = true;
>>  	amdgpu_legacy_dpm_compute_clocks(adev);
>> +	mutex_unlock(&adev->pm.mutex);
>>  	return ret;
>>  }
>>  
>> @@ -7811,11 +7813,15 @@ static int si_dpm_suspend(struct amdgpu_ip_block *ip_block)
>>  	struct amdgpu_device *adev = ip_block->adev;
>>  
>>  	if (adev->pm.dpm_enabled) {
>> +		mutex_lock(&adev->pm.mutex);
>> +		adev->pm.dpm_enabled = false;
>>  		/* disable dpm */
>>  		si_dpm_disable(adev);
>>  		/* reset the power state */
>>  		adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
>> +		mutex_unlock(&adev->pm.mutex);
>>  	}
>> +
>>  	return 0;
>>  }
>>  
>> @@ -7824,18 +7830,24 @@ static int si_dpm_resume(struct amdgpu_ip_block *ip_block)
>>  	int ret;
>>  	struct amdgpu_device *adev = ip_block->adev;
>>  
>> -	if (adev->pm.dpm_enabled) {
>> +	if (!amdgpu_dpm)
>> +		return 0;
>> +
>> +	if (!adev->pm.dpm_enabled) {
>>  		/* asic init will reset to the boot state */
>> +		mutex_lock(&adev->pm.mutex);
>>  		si_dpm_setup_asic(adev);
>>  		ret = si_dpm_enable(adev);
>>  		if (ret)
>>  			adev->pm.dpm_enabled = false;
> 
> Same 'braces' comment.
> 
>> -		else
>> +		else {
>>  			adev->pm.dpm_enabled = true;
>> -		if (adev->pm.dpm_enabled)
>>  			amdgpu_legacy_dpm_compute_clocks(adev);
>> +		}
>> +		mutex_unlock(&adev->pm.mutex);
>>  	}
>> -	return 0;
>> +
>> +	return ret;
> 
> Possibility of returning unintialized value.
> 
> Thanks,
> Lijo
> 
>>  }
>>  
>>  static bool si_dpm_is_idle(void *handle)
> 




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux