Re: [PATCH 1/2] drm/amd/pm: fix and simplify workload handling

"Lazar, Lijo" <lijo.lazar@xxxxxxx> · Wed, 13 Nov 2024 10:29:09 +0530

On 11/13/2024 1:58 AM, Alex Deucher wrote:
> smu->workload_mask is IP specific and should not be messed with in
> the common code. The mask bits vary across SMU versions.
> 
> Move all handling of smu->workload_mask in to the backends and
> simplify the code.  Store the user's preference in smu->power_profile_mode
> which will be reflected in sysfs.  For internal driver profile
> switches for KFD or VCN, just update the workload mask so that the
> user's preference is retained.  Remove all of the extra now unused
> workload related elements in the smu structure.
> 
> v2: use refcounts for workload profiles
> 
> Fixes: 8cc438be5d49 ("drm/amd/pm: correct the workload setting")
> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
> Cc: Kenneth Feng <kenneth.feng@xxxxxxx>
> Cc: Lijo Lazar <lijo.lazar@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 128 +++++++++---------
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  13 +-
>  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  20 +--
>  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  20 +--
>  .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  21 +--
>  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  |  17 +--
>  .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   |  17 +--
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  33 ++---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  21 +--
>  .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c  |  33 ++---
>  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        |   8 --
>  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |   2 -
>  12 files changed, 162 insertions(+), 171 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index c3a6b6f20455..41b591ecfb64 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -72,6 +72,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit);
>  static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
>  static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
>  static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
> +static void smu_power_profile_mode_get(struct smu_context *smu,
> +				       enum PP_SMC_POWER_PROFILE profile_mode);
> +static void smu_power_profile_mode_put(struct smu_context *smu,
> +				       enum PP_SMC_POWER_PROFILE profile_mode);
>  
>  static int smu_sys_get_pp_feature_mask(void *handle,
>  				       char *buf)
> @@ -765,6 +769,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
>  	smu->user_dpm_profile.fan_mode = -1;
>  
>  	mutex_init(&smu->message_lock);
> +	mutex_init(&smu->workload_lock);
>  
>  	adev->powerplay.pp_handle = smu;
>  	adev->powerplay.pp_funcs = &swsmu_pm_funcs;
> @@ -1268,9 +1273,6 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>  	INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn);
>  	atomic64_set(&smu->throttle_int_counter, 0);
>  	smu->watermarks_bitmap = 0;
> -	smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> -	smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> -	smu->user_dpm_profile.user_workload_mask = 0;
>  
>  	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
>  		atomic_set(&smu->smu_power.power_gate.vcn_gated[i], 1);
> @@ -1278,33 +1280,13 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>  	atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
>  	atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
>  
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3;
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4;
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
> -	smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
> -
>  	if (smu->is_apu ||
> -	    !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) {
> -		smu->driver_workload_mask =
> -			1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
> -	} else {
> -		smu->driver_workload_mask =
> -			1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
> -		smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> -	}
> -
> -	smu->workload_mask = smu->driver_workload_mask |
> -							smu->user_dpm_profile.user_workload_mask;
> -	smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> -	smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> -	smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
> -	smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO;
> -	smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR;
> -	smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE;
> -	smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM;
> +	    !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
> +		smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> +	else
> +		smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
> +	smu_power_profile_mode_get(smu, smu->power_profile_mode);
> +
>  	smu->display_config = &adev->pm.pm_display_cfg;
>  
>  	smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
> @@ -2252,24 +2234,41 @@ static int smu_enable_umd_pstate(void *handle,
>  }
>  
>  static int smu_bump_power_profile_mode(struct smu_context *smu,
> -					   long *param,
> -					   uint32_t param_size)
> +				       long *param,
> +				       uint32_t param_size,
> +				       bool enable)
>  {
>  	int ret = 0;
>  
>  	if (smu->ppt_funcs->set_power_profile_mode)
> -		ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size);
> +		ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size, enable);

Have a different expectation with refcount; not expecting to see
enable/disable. I think only switch power_profile_mode is required.

Workload mask is then created based on non-zero refcounts in this array
- smu->workload_refcount[]. If it is different from the current mask,
then it's applied.

>  
>  	return ret;
>  }
>  
> +static void smu_power_profile_mode_get(struct smu_context *smu,
> +				       enum PP_SMC_POWER_PROFILE profile_mode)
> +{
> +	mutex_lock(&smu->workload_lock);

I think this is not needed. DPM calls are already under lock, not seeing
a case where it could do toggle get/put at the sametime.

> +	smu->workload_refcount[profile_mode]++;
> +	mutex_unlock(&smu->workload_lock);
> +}
> +
> +static void smu_power_profile_mode_put(struct smu_context *smu,
> +				       enum PP_SMC_POWER_PROFILE profile_mode)
> +{
> +	mutex_lock(&smu->workload_lock);
> +	if (smu->workload_refcount[profile_mode])
> +		smu->workload_refcount[profile_mode]--;
> +	mutex_unlock(&smu->workload_lock);
> +}
> +
>  static int smu_adjust_power_state_dynamic(struct smu_context *smu,
>  					  enum amd_dpm_forced_level level,
>  					  bool skip_display_settings,
>  					  bool init)
>  {
>  	int ret = 0;
> -	int index = 0;
>  	long workload[1];
>  	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
>  
> @@ -2307,13 +2306,11 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
>  	}
>  
>  	if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
> -		smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
> -		index = fls(smu->workload_mask);
> -		index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
> -		workload[0] = smu->workload_setting[index];
> +	    smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
> +		workload[0] = smu->power_profile_mode;
>  
> -		if (init || smu->power_profile_mode != workload[0])
> -			smu_bump_power_profile_mode(smu, workload, 0);
> +		if (init)
> +			smu_bump_power_profile_mode(smu, workload, 0, true);

Same here - not expecting to have init check here. Since workload_mask
is 0 during init and workload_refcount is changed, it will set the right
mask on smu_bump_power_profile_mode().

>  	}
>  
>  	return ret;
> @@ -2361,12 +2358,11 @@ static int smu_handle_dpm_task(void *handle,
>  
>  static int smu_switch_power_profile(void *handle,
>  				    enum PP_SMC_POWER_PROFILE type,
> -				    bool en)
> +				    bool enable)
>  {
>  	struct smu_context *smu = handle;
>  	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
>  	long workload[1];
> -	uint32_t index;
>  
>  	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
>  		return -EOPNOTSUPP;
> @@ -2374,24 +2370,16 @@ static int smu_switch_power_profile(void *handle,
>  	if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
>  		return -EINVAL;
>  
> -	if (!en) {
> -		smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]);
> -		index = fls(smu->workload_mask);
> -		index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
> -		workload[0] = smu->workload_setting[index];
> -	} else {
> -		smu->driver_workload_mask |= (1 << smu->workload_priority[type]);
> -		index = fls(smu->workload_mask);
> -		index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
> -		workload[0] = smu->workload_setting[index];
> -	}
> -
> -	smu->workload_mask = smu->driver_workload_mask |
> -						 smu->user_dpm_profile.user_workload_mask;
> +	workload[0] = type;
>  
>  	if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
> -		smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
> -		smu_bump_power_profile_mode(smu, workload, 0);
> +	    smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
> +		if (enable)
> +			smu_power_profile_mode_get(smu, type);
> +		else
> +			smu_power_profile_mode_put(smu, type);
> +		smu_bump_power_profile_mode(smu, workload, 0, enable);
> +	}
>  
>  	return 0;
>  }
> @@ -3090,21 +3078,27 @@ static int smu_set_power_profile_mode(void *handle,
>  				      uint32_t param_size)
>  {
>  	struct smu_context *smu = handle;
> -	int ret;
> +	long workload[1];
> +	int ret = 0;
>  
>  	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
>  	    !smu->ppt_funcs->set_power_profile_mode)
>  		return -EOPNOTSUPP;
>  
> -	if (smu->user_dpm_profile.user_workload_mask &
> -	   (1 << smu->workload_priority[param[param_size]]))
> -	   return 0;
> -
> -	smu->user_dpm_profile.user_workload_mask =
> -		(1 << smu->workload_priority[param[param_size]]);
> -	smu->workload_mask = smu->user_dpm_profile.user_workload_mask |
> -		smu->driver_workload_mask;
> -	ret = smu_bump_power_profile_mode(smu, param, param_size);
> +	if (param[param_size] != smu->power_profile_mode) {
> +		/* clear the old user preference */
> +		workload[0] = smu->power_profile_mode;
> +		smu_power_profile_mode_put(smu, smu->power_profile_mode);
> +		ret = smu_bump_power_profile_mode(smu, workload, 0, false);
> +		if (ret)
> +			return ret;

Here as well - no need to call twice with false/true. Put the existing
one and get the new one. If smu_bump_power_profile_mode call fails, then
we have to reverse the operation though - this is true for other cases also.

Thanks,
Lijo

> +		/* set the new user preference */
> +		smu_power_profile_mode_get(smu, param[param_size]);
> +		ret = smu_bump_power_profile_mode(smu, param, param_size, true);
> +		if (!ret)
> +			/* store the user's preference */
> +			smu->power_profile_mode = param[param_size];
> +	}
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index fa93a8879113..da7558a65c09 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -240,7 +240,6 @@ struct smu_user_dpm_profile {
>  	/* user clock state information */
>  	uint32_t clk_mask[SMU_CLK_COUNT];
>  	uint32_t clk_dependency;
> -	uint32_t user_workload_mask;
>  };
>  
>  #define SMU_TABLE_INIT(tables, table_id, s, a, d)	\
> @@ -557,12 +556,12 @@ struct smu_context {
>  	uint32_t hard_min_uclk_req_from_dal;
>  	bool disable_uclk_switch;
>  
> +	/* backend specific workload mask */
>  	uint32_t workload_mask;
> -	uint32_t driver_workload_mask;
> -	uint32_t workload_priority[WORKLOAD_POLICY_MAX];
> -	uint32_t workload_setting[WORKLOAD_POLICY_MAX];
> +	/* default/user workload preference */
>  	uint32_t power_profile_mode;
> -	uint32_t default_power_profile_mode;
> +	uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];
> +	struct mutex workload_lock;
>  	bool pm_enabled;
>  	bool is_apu;
>  
> @@ -734,8 +733,10 @@ struct pptable_funcs {
>  	 *                          create/set custom power profile modes.
>  	 * &input: Power profile mode parameters.
>  	 * &size: Size of &input.
> +	 * &enable: enable/disable the profile
>  	 */
> -	int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size);
> +	int (*set_power_profile_mode)(struct smu_context *smu, long *input,
> +				      uint32_t size, bool enable);
>  
>  	/**
>  	 * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> index 4b36c230e43a..1e44cf6fec4b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
> @@ -1443,7 +1443,8 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
>  
>  static int arcturus_set_power_profile_mode(struct smu_context *smu,
>  					   long *input,
> -					   uint32_t size)
> +					   uint32_t size,
> +					   bool enable)
>  {
>  	DpmActivityMonitorCoeffInt_t activity_monitor;
>  	int workload_type = 0;
> @@ -1455,8 +1456,9 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
>  		return -EINVAL;
>  	}
>  
> -	if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
> -	     (smu->smc_fw_version >= 0x360d00)) {
> +	if (enable &&
> +	    (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
> +	    (smu->smc_fw_version >= 0x360d00)) {
>  		if (size != 10)
>  			return -EINVAL;
>  
> @@ -1520,18 +1522,18 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
>  		return -EINVAL;
>  	}
>  
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  	ret = smu_cmn_send_smc_msg_with_param(smu,
>  					  SMU_MSG_SetWorkloadMask,
>  					  smu->workload_mask,
>  					  NULL);
> -	if (ret) {
> +	if (ret)
>  		dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
> -		return ret;
> -	}
> -
> -	smu_cmn_assign_power_profile(smu);
>  
> -	return 0;
> +	return ret;
>  }
>  
>  static int arcturus_set_performance_level(struct smu_context *smu,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 211635dabed8..d944a9f954d0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -2006,19 +2006,19 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf)
>  	return size;
>  }
>  
> -static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
> +static int navi10_set_power_profile_mode(struct smu_context *smu, long *input,
> +					 uint32_t size, bool enable)
>  {
>  	DpmActivityMonitorCoeffInt_t activity_monitor;
>  	int workload_type, ret = 0;
> +	uint32_t profile_mode = input[size];
>  
> -	smu->power_profile_mode = input[size];
> -
> -	if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
> -		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
> +	if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
> +		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>  		return -EINVAL;
>  	}
>  
> -	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
> +	if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>  		if (size != 10)
>  			return -EINVAL;
>  
> @@ -2080,16 +2080,18 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>  	workload_type = smu_cmn_to_asic_specific_index(smu,
>  						       CMN2ASIC_MAPPING_WORKLOAD,
> -						       smu->power_profile_mode);
> +						       profile_mode);
>  	if (workload_type < 0)
>  		return -EINVAL;
>  
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>  				    smu->workload_mask, NULL);
>  	if (ret)
>  		dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
> -	else
> -		smu_cmn_assign_power_profile(smu);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> index 844532a9b641..4967e087088b 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> @@ -1704,22 +1704,23 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *
>  	return size;
>  }
>  
> -static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
> +static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu,
> +						 long *input, uint32_t size,
> +						 bool enable)
>  {
>  
>  	DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>  	DpmActivityMonitorCoeffInt_t *activity_monitor =
>  		&(activity_monitor_external.DpmActivityMonitorCoeffInt);
> +	uint32_t profile_mode = input[size];
>  	int workload_type, ret = 0;
>  
> -	smu->power_profile_mode = input[size];
> -
> -	if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
> -		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
> +	if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
> +		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>  		return -EINVAL;
>  	}
>  
> -	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
> +	if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>  		if (size != 10)
>  			return -EINVAL;
>  
> @@ -1781,16 +1782,18 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>  	workload_type = smu_cmn_to_asic_specific_index(smu,
>  						       CMN2ASIC_MAPPING_WORKLOAD,
> -						       smu->power_profile_mode);
> +						       profile_mode);
>  	if (workload_type < 0)
>  		return -EINVAL;
>  
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>  				    smu->workload_mask, NULL);
>  	if (ret)
>  		dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
> -	else
> -		smu_cmn_assign_power_profile(smu);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index f89c487dce72..b5dba4826f81 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -1056,7 +1056,8 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu,
>  	return size;
>  }
>  
> -static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
> +static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
> +					  uint32_t size, bool enable)
>  {
>  	int workload_type, ret;
>  	uint32_t profile_mode = input[size];
> @@ -1067,7 +1068,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
>  	}
>  
>  	if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
> -			profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
> +	    profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
>  		return 0;
>  
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
> @@ -1080,18 +1081,18 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
>  		return -EINVAL;
>  	}
>  
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
>  				    smu->workload_mask,
>  				    NULL);
> -	if (ret) {
> +	if (ret)
>  		dev_err_once(smu->adev->dev, "Fail to set workload type %d\n",
>  					workload_type);
> -		return ret;
> -	}
> -
> -	smu_cmn_assign_power_profile(smu);
>  
> -	return 0;
> +	return ret;
>  }
>  
>  static int vangogh_set_soft_freq_limited_range(struct smu_context *smu,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> index 75a9ea87f419..2d1eae79ab9d 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
> @@ -864,7 +864,8 @@ static int renoir_force_clk_levels(struct smu_context *smu,
>  	return ret;
>  }
>  
> -static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
> +static int renoir_set_power_profile_mode(struct smu_context *smu, long *input,
> +					 uint32_t size, bool enable)
>  {
>  	int workload_type, ret;
>  	uint32_t profile_mode = input[size];
> @@ -875,7 +876,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
>  	}
>  
>  	if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
> -			profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
> +	    profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
>  		return 0;
>  
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
> @@ -891,17 +892,17 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
>  		return -EINVAL;
>  	}
>  
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
>  				    smu->workload_mask,
>  				    NULL);
> -	if (ret) {
> +	if (ret)
>  		dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
> -		return ret;
> -	}
>  
> -	smu_cmn_assign_power_profile(smu);
> -
> -	return 0;
> +	return ret;
>  }
>  
>  static int renoir_set_peak_clock_by_device(struct smu_context *smu)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 80c6b1e523aa..3cc734331891 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -2573,22 +2573,22 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu,
>  
>  static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
>  					      long *input,
> -					      uint32_t size)
> +					      uint32_t size,
> +					      bool enable)
>  {
>  	DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>  	DpmActivityMonitorCoeffInt_t *activity_monitor =
>  		&(activity_monitor_external.DpmActivityMonitorCoeffInt);
> +	uint32_t profile_mode = input[size];
>  	int workload_type, ret = 0;
>  	u32 workload_mask;
>  
> -	smu->power_profile_mode = input[size];
> -
> -	if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
> -		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
> +	if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
> +		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>  		return -EINVAL;
>  	}
>  
> -	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
> +	if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>  		if (size != 9)
>  			return -EINVAL;
>  
> @@ -2641,13 +2641,18 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>  	workload_type = smu_cmn_to_asic_specific_index(smu,
>  						       CMN2ASIC_MAPPING_WORKLOAD,
> -						       smu->power_profile_mode);
> +						       profile_mode);
>  
>  	if (workload_type < 0)
>  		return -EINVAL;
>  
>  	workload_mask = 1 << workload_type;
>  
> +	if (enable)
> +		smu->workload_mask |= workload_mask;
> +	else
> +		smu->workload_mask &= ~workload_mask;
> +
>  	/* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
>  	if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
>  	     ((smu->adev->pm.fw_version == 0x004e6601) ||
> @@ -2658,25 +2663,13 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
>  							       CMN2ASIC_MAPPING_WORKLOAD,
>  							       PP_SMC_POWER_PROFILE_POWERSAVING);
>  		if (workload_type >= 0)
> -			workload_mask |= 1 << workload_type;
> +			smu->workload_mask |= 1 << workload_type;
>  	}
>  
> -	smu->workload_mask |= workload_mask;
>  	ret = smu_cmn_send_smc_msg_with_param(smu,
>  					       SMU_MSG_SetWorkloadMask,
>  					       smu->workload_mask,
>  					       NULL);
> -	if (!ret) {
> -		smu_cmn_assign_power_profile(smu);
> -		if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) {
> -			workload_type = smu_cmn_to_asic_specific_index(smu,
> -							       CMN2ASIC_MAPPING_WORKLOAD,
> -							       PP_SMC_POWER_PROFILE_FULLSCREEN3D);
> -			smu->power_profile_mode = smu->workload_mask & (1 << workload_type)
> -										? PP_SMC_POWER_PROFILE_FULLSCREEN3D
> -										: PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
> -		}
> -	}
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> index c5d3e25cc967..1aafd23857f0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> @@ -2528,22 +2528,23 @@ do {													\
>  	return result;
>  }
>  
> -static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
> +static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu,
> +					      long *input, uint32_t size,
> +					      bool enable)
>  {
>  
>  	DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>  	DpmActivityMonitorCoeffInt_t *activity_monitor =
>  		&(activity_monitor_external.DpmActivityMonitorCoeffInt);
> +	uint32_t profile_mode = input[size];
>  	int workload_type, ret = 0;
>  
> -	smu->power_profile_mode = input[size];
> -
> -	if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
> -		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
> +	if (profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
> +		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>  		return -EINVAL;
>  	}
>  
> -	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
> +	if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>  		if (size != 8)
>  			return -EINVAL;
>  
> @@ -2590,17 +2591,19 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>  	workload_type = smu_cmn_to_asic_specific_index(smu,
>  						       CMN2ASIC_MAPPING_WORKLOAD,
> -						       smu->power_profile_mode);
> +						       profile_mode);
>  	if (workload_type < 0)
>  		return -EINVAL;
>  
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>  				    smu->workload_mask, NULL);
>  
>  	if (ret)
>  		dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
> -	else
> -		smu_cmn_assign_power_profile(smu);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
> index 59b369eff30f..b64490bcfd62 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
> @@ -1719,21 +1719,21 @@ static int smu_v14_0_2_get_power_profile_mode(struct smu_context *smu,
>  
>  static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
>  					      long *input,
> -					      uint32_t size)
> +					      uint32_t size,
> +					      bool enable)
>  {
>  	DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>  	DpmActivityMonitorCoeffInt_t *activity_monitor =
>  		&(activity_monitor_external.DpmActivityMonitorCoeffInt);
> +	uint32_t profile_mode = input[size];
>  	int workload_type, ret = 0;
> -	uint32_t current_profile_mode = smu->power_profile_mode;
> -	smu->power_profile_mode = input[size];
>  
> -	if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
> -		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
> +	if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
> +		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>  		return -EINVAL;
>  	}
>  
> -	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
> +	if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>  		if (size != 9)
>  			return -EINVAL;
>  
> @@ -1783,23 +1783,24 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
>  		}
>  	}
>  
> -	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
> -		smu_v14_0_deep_sleep_control(smu, false);
> -	else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
> -		smu_v14_0_deep_sleep_control(smu, true);
> -
>  	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>  	workload_type = smu_cmn_to_asic_specific_index(smu,
>  						       CMN2ASIC_MAPPING_WORKLOAD,
> -						       smu->power_profile_mode);
> +						       profile_mode);
>  	if (workload_type < 0)
>  		return -EINVAL;
>  
> -	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
> -										  smu->workload_mask, NULL);
> +	if (enable)
> +		smu->workload_mask |= (1 << workload_type);
> +	else
> +		smu->workload_mask &= ~(1 << workload_type);
>  
> -	if (!ret)
> -		smu_cmn_assign_power_profile(smu);
> +	/* disable deep sleep if compute is enabled */
> +	if (profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
> +		smu_v14_0_deep_sleep_control(smu, !enable);
> +
> +	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
> +					      smu->workload_mask, NULL);
>  
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> index fd2aa949538e..63c4f75fa118 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
> @@ -1141,14 +1141,6 @@ int smu_cmn_set_mp1_state(struct smu_context *smu,
>  	return ret;
>  }
>  
> -void smu_cmn_assign_power_profile(struct smu_context *smu)
> -{
> -	uint32_t index;
> -	index = fls(smu->workload_mask);
> -	index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
> -	smu->power_profile_mode = smu->workload_setting[index];
> -}
> -
>  bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev)
>  {
>  	struct pci_dev *p = NULL;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> index 8a801e389659..1de685defe85 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
> @@ -130,8 +130,6 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev);
>  int smu_cmn_set_mp1_state(struct smu_context *smu,
>  			  enum pp_mp1_state mp1_state);
>  
> -void smu_cmn_assign_power_profile(struct smu_context *smu);
> -
>  /*
>   * Helper function to make sysfs_emit_at() happy. Align buf to
>   * the current page boundary and record the offset.