Re: [PATCH 1/2] drm/amd/pm: fix and simplify workload handling

"Lazar, Lijo" <lijo.lazar@xxxxxxx> · Wed, 13 Nov 2024 13:00:52 +0530

On 11/13/2024 11:27 AM, Alex Deucher wrote:
> On Tue, Nov 12, 2024 at 11:59 PM Lazar, Lijo <lijo.lazar@xxxxxxx> wrote:
>>
>>
>>
>> On 11/13/2024 1:58 AM, Alex Deucher wrote:
>>> smu->workload_mask is IP specific and should not be messed with in
>>> the common code. The mask bits vary across SMU versions.
>>>
>>> Move all handling of smu->workload_mask in to the backends and
>>> simplify the code.  Store the user's preference in smu->power_profile_mode
>>> which will be reflected in sysfs.  For internal driver profile
>>> switches for KFD or VCN, just update the workload mask so that the
>>> user's preference is retained.  Remove all of the extra now unused
>>> workload related elements in the smu structure.
>>>
>>> v2: use refcounts for workload profiles
>>>
>>> Fixes: 8cc438be5d49 ("drm/amd/pm: correct the workload setting")
>>> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
>>> Cc: Kenneth Feng <kenneth.feng@xxxxxxx>
>>> Cc: Lijo Lazar <lijo.lazar@xxxxxxx>
>>> ---
>>>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 128 +++++++++---------
>>>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  13 +-
>>>  .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c |  20 +--
>>>  .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c   |  20 +--
>>>  .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   |  21 +--
>>>  .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  |  17 +--
>>>  .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c   |  17 +--
>>>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  |  33 ++---
>>>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  |  21 +--
>>>  .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c  |  33 ++---
>>>  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        |   8 --
>>>  drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |   2 -
>>>  12 files changed, 162 insertions(+), 171 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> index c3a6b6f20455..41b591ecfb64 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> @@ -72,6 +72,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit);
>>>  static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
>>>  static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
>>>  static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
>>> +static void smu_power_profile_mode_get(struct smu_context *smu,
>>> +                                    enum PP_SMC_POWER_PROFILE profile_mode);
>>> +static void smu_power_profile_mode_put(struct smu_context *smu,
>>> +                                    enum PP_SMC_POWER_PROFILE profile_mode);
>>>
>>>  static int smu_sys_get_pp_feature_mask(void *handle,
>>>                                      char *buf)
>>> @@ -765,6 +769,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
>>>       smu->user_dpm_profile.fan_mode = -1;
>>>
>>>       mutex_init(&smu->message_lock);
>>> +     mutex_init(&smu->workload_lock);
>>>
>>>       adev->powerplay.pp_handle = smu;
>>>       adev->powerplay.pp_funcs = &swsmu_pm_funcs;
>>> @@ -1268,9 +1273,6 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>>>       INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn);
>>>       atomic64_set(&smu->throttle_int_counter, 0);
>>>       smu->watermarks_bitmap = 0;
>>> -     smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>>> -     smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>>> -     smu->user_dpm_profile.user_workload_mask = 0;
>>>
>>>       for (i = 0; i < adev->vcn.num_vcn_inst; i++)
>>>               atomic_set(&smu->smu_power.power_gate.vcn_gated[i], 1);
>>> @@ -1278,33 +1280,13 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
>>>       atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
>>>       atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
>>>
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3;
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4;
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
>>> -     smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
>>> -
>>>       if (smu->is_apu ||
>>> -         !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) {
>>> -             smu->driver_workload_mask =
>>> -                     1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
>>> -     } else {
>>> -             smu->driver_workload_mask =
>>> -                     1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
>>> -             smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
>>> -     }
>>> -
>>> -     smu->workload_mask = smu->driver_workload_mask |
>>> -                                                     smu->user_dpm_profile.user_workload_mask;
>>> -     smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>>> -     smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
>>> -     smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
>>> -     smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO;
>>> -     smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR;
>>> -     smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE;
>>> -     smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM;
>>> +         !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
>>> +             smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>>> +     else
>>> +             smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
>>> +     smu_power_profile_mode_get(smu, smu->power_profile_mode);
>>> +
>>>       smu->display_config = &adev->pm.pm_display_cfg;
>>>
>>>       smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
>>> @@ -2252,24 +2234,41 @@ static int smu_enable_umd_pstate(void *handle,
>>>  }
>>>
>>>  static int smu_bump_power_profile_mode(struct smu_context *smu,
>>> -                                        long *param,
>>> -                                        uint32_t param_size)
>>> +                                    long *param,
>>> +                                    uint32_t param_size,
>>> +                                    bool enable)
>>>  {
>>>       int ret = 0;
>>>
>>>       if (smu->ppt_funcs->set_power_profile_mode)
>>> -             ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size);
>>> +             ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size, enable);
>>
>> Have a different expectation with refcount; not expecting to see
>> enable/disable. I think only switch power_profile_mode is required.
>>
>> Workload mask is then created based on non-zero refcounts in this array
>> - smu->workload_refcount[]. If it is different from the current mask,
>> then it's applied.
> 
> I tried that originally, but the problem was the custom profiles.
> They need a bunch of extra parameters so it seemed easier to just
> leave the set_power_profile backend API as is.  Although thinking
> about it more I can save them off in amdgpu_smu.c when I update the
> ref count and then just pass the mask to the backend set_power_profile
> API.  

Does the requirement translate to always do a force update if
smu->power_profile_mode == custom? If so, this can be checked/done in
backend.

Thanks,
Lijo

Additionally for resume, we need to make sure the current
> profile gets sent to PMFW.  Once again, thinking about it more, I can
> just clear the backend workload_mask on suspend and then on resume, it
> won't match and will update.
> 
>>
>>>
>>>       return ret;
>>>  }
>>>
>>> +static void smu_power_profile_mode_get(struct smu_context *smu,
>>> +                                    enum PP_SMC_POWER_PROFILE profile_mode)
>>> +{
>>> +     mutex_lock(&smu->workload_lock);
>>
>> I think this is not needed. DPM calls are already under lock, not seeing
>> a case where it could do toggle get/put at the sametime.
> 
> Will drop.
> 
>>
>>> +     smu->workload_refcount[profile_mode]++;
>>> +     mutex_unlock(&smu->workload_lock);
>>> +}
>>> +
>>> +static void smu_power_profile_mode_put(struct smu_context *smu,
>>> +                                    enum PP_SMC_POWER_PROFILE profile_mode)
>>> +{
>>> +     mutex_lock(&smu->workload_lock);
>>> +     if (smu->workload_refcount[profile_mode])
>>> +             smu->workload_refcount[profile_mode]--;
>>> +     mutex_unlock(&smu->workload_lock);
>>> +}
>>> +
>>>  static int smu_adjust_power_state_dynamic(struct smu_context *smu,
>>>                                         enum amd_dpm_forced_level level,
>>>                                         bool skip_display_settings,
>>>                                         bool init)
>>>  {
>>>       int ret = 0;
>>> -     int index = 0;
>>>       long workload[1];
>>>       struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
>>>
>>> @@ -2307,13 +2306,11 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
>>>       }
>>>
>>>       if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
>>> -             smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
>>> -             index = fls(smu->workload_mask);
>>> -             index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
>>> -             workload[0] = smu->workload_setting[index];
>>> +         smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
>>> +             workload[0] = smu->power_profile_mode;
>>>
>>> -             if (init || smu->power_profile_mode != workload[0])
>>> -                     smu_bump_power_profile_mode(smu, workload, 0);
>>> +             if (init)
>>> +                     smu_bump_power_profile_mode(smu, workload, 0, true);
>>
>> Same here - not expecting to have init check here. Since workload_mask
>> is 0 during init and workload_refcount is changed, it will set the right
>> mask on smu_bump_power_profile_mode().
> 
> This was to cover resume so we apply the current state to PMFW on
> resume.  Otherwise, the driver would skip the state update because it
> thinks the state is already up to date.
> 
>>
>>>       }
>>>
>>>       return ret;
>>> @@ -2361,12 +2358,11 @@ static int smu_handle_dpm_task(void *handle,
>>>
>>>  static int smu_switch_power_profile(void *handle,
>>>                                   enum PP_SMC_POWER_PROFILE type,
>>> -                                 bool en)
>>> +                                 bool enable)
>>>  {
>>>       struct smu_context *smu = handle;
>>>       struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
>>>       long workload[1];
>>> -     uint32_t index;
>>>
>>>       if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
>>>               return -EOPNOTSUPP;
>>> @@ -2374,24 +2370,16 @@ static int smu_switch_power_profile(void *handle,
>>>       if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
>>>               return -EINVAL;
>>>
>>> -     if (!en) {
>>> -             smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]);
>>> -             index = fls(smu->workload_mask);
>>> -             index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
>>> -             workload[0] = smu->workload_setting[index];
>>> -     } else {
>>> -             smu->driver_workload_mask |= (1 << smu->workload_priority[type]);
>>> -             index = fls(smu->workload_mask);
>>> -             index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
>>> -             workload[0] = smu->workload_setting[index];
>>> -     }
>>> -
>>> -     smu->workload_mask = smu->driver_workload_mask |
>>> -                                              smu->user_dpm_profile.user_workload_mask;
>>> +     workload[0] = type;
>>>
>>>       if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
>>> -             smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
>>> -             smu_bump_power_profile_mode(smu, workload, 0);
>>> +         smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
>>> +             if (enable)
>>> +                     smu_power_profile_mode_get(smu, type);
>>> +             else
>>> +                     smu_power_profile_mode_put(smu, type);
>>> +             smu_bump_power_profile_mode(smu, workload, 0, enable);
>>> +     }
>>>
>>>       return 0;
>>>  }
>>> @@ -3090,21 +3078,27 @@ static int smu_set_power_profile_mode(void *handle,
>>>                                     uint32_t param_size)
>>>  {
>>>       struct smu_context *smu = handle;
>>> -     int ret;
>>> +     long workload[1];
>>> +     int ret = 0;
>>>
>>>       if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
>>>           !smu->ppt_funcs->set_power_profile_mode)
>>>               return -EOPNOTSUPP;
>>>
>>> -     if (smu->user_dpm_profile.user_workload_mask &
>>> -        (1 << smu->workload_priority[param[param_size]]))
>>> -        return 0;
>>> -
>>> -     smu->user_dpm_profile.user_workload_mask =
>>> -             (1 << smu->workload_priority[param[param_size]]);
>>> -     smu->workload_mask = smu->user_dpm_profile.user_workload_mask |
>>> -             smu->driver_workload_mask;
>>> -     ret = smu_bump_power_profile_mode(smu, param, param_size);
>>> +     if (param[param_size] != smu->power_profile_mode) {
>>> +             /* clear the old user preference */
>>> +             workload[0] = smu->power_profile_mode;
>>> +             smu_power_profile_mode_put(smu, smu->power_profile_mode);
>>> +             ret = smu_bump_power_profile_mode(smu, workload, 0, false);
>>> +             if (ret)
>>> +                     return ret;
>>
>> Here as well - no need to call twice with false/true. Put the existing
>> one and get the new one. If smu_bump_power_profile_mode call fails, then
>> we have to reverse the operation though - this is true for other cases also.
> 
> Good catch.  Will fix.
> 
> Alex
> 
>>
>> Thanks,
>> Lijo
>>
>>> +             /* set the new user preference */
>>> +             smu_power_profile_mode_get(smu, param[param_size]);
>>> +             ret = smu_bump_power_profile_mode(smu, param, param_size, true);
>>> +             if (!ret)
>>> +                     /* store the user's preference */
>>> +                     smu->power_profile_mode = param[param_size];
>>> +     }
>>>
>>>       return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> index fa93a8879113..da7558a65c09 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> @@ -240,7 +240,6 @@ struct smu_user_dpm_profile {
>>>       /* user clock state information */
>>>       uint32_t clk_mask[SMU_CLK_COUNT];
>>>       uint32_t clk_dependency;
>>> -     uint32_t user_workload_mask;
>>>  };
>>>
>>>  #define SMU_TABLE_INIT(tables, table_id, s, a, d)    \
>>> @@ -557,12 +556,12 @@ struct smu_context {
>>>       uint32_t hard_min_uclk_req_from_dal;
>>>       bool disable_uclk_switch;
>>>
>>> +     /* backend specific workload mask */
>>>       uint32_t workload_mask;
>>> -     uint32_t driver_workload_mask;
>>> -     uint32_t workload_priority[WORKLOAD_POLICY_MAX];
>>> -     uint32_t workload_setting[WORKLOAD_POLICY_MAX];
>>> +     /* default/user workload preference */
>>>       uint32_t power_profile_mode;
>>> -     uint32_t default_power_profile_mode;
>>> +     uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];
>>> +     struct mutex workload_lock;
>>>       bool pm_enabled;
>>>       bool is_apu;
>>>
>>> @@ -734,8 +733,10 @@ struct pptable_funcs {
>>>        *                          create/set custom power profile modes.
>>>        * &input: Power profile mode parameters.
>>>        * &size: Size of &input.
>>> +      * &enable: enable/disable the profile
>>>        */
>>> -     int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size);
>>> +     int (*set_power_profile_mode)(struct smu_context *smu, long *input,
>>> +                                   uint32_t size, bool enable);
>>>
>>>       /**
>>>        * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
>>> index 4b36c230e43a..1e44cf6fec4b 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
>>> @@ -1443,7 +1443,8 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
>>>
>>>  static int arcturus_set_power_profile_mode(struct smu_context *smu,
>>>                                          long *input,
>>> -                                        uint32_t size)
>>> +                                        uint32_t size,
>>> +                                        bool enable)
>>>  {
>>>       DpmActivityMonitorCoeffInt_t activity_monitor;
>>>       int workload_type = 0;
>>> @@ -1455,8 +1456,9 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
>>>               return -EINVAL;
>>>       }
>>>
>>> -     if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
>>> -          (smu->smc_fw_version >= 0x360d00)) {
>>> +     if (enable &&
>>> +         (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
>>> +         (smu->smc_fw_version >= 0x360d00)) {
>>>               if (size != 10)
>>>                       return -EINVAL;
>>>
>>> @@ -1520,18 +1522,18 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
>>>               return -EINVAL;
>>>       }
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>       ret = smu_cmn_send_smc_msg_with_param(smu,
>>>                                         SMU_MSG_SetWorkloadMask,
>>>                                         smu->workload_mask,
>>>                                         NULL);
>>> -     if (ret) {
>>> +     if (ret)
>>>               dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
>>> -             return ret;
>>> -     }
>>> -
>>> -     smu_cmn_assign_power_profile(smu);
>>>
>>> -     return 0;
>>> +     return ret;
>>>  }
>>>
>>>  static int arcturus_set_performance_level(struct smu_context *smu,
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>>> index 211635dabed8..d944a9f954d0 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
>>> @@ -2006,19 +2006,19 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf)
>>>       return size;
>>>  }
>>>
>>> -static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
>>> +static int navi10_set_power_profile_mode(struct smu_context *smu, long *input,
>>> +                                      uint32_t size, bool enable)
>>>  {
>>>       DpmActivityMonitorCoeffInt_t activity_monitor;
>>>       int workload_type, ret = 0;
>>> +     uint32_t profile_mode = input[size];
>>>
>>> -     smu->power_profile_mode = input[size];
>>> -
>>> -     if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
>>> -             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
>>> +     if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>>>               return -EINVAL;
>>>       }
>>>
>>> -     if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +     if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>>               if (size != 10)
>>>                       return -EINVAL;
>>>
>>> @@ -2080,16 +2080,18 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>>       workload_type = smu_cmn_to_asic_specific_index(smu,
>>>                                                      CMN2ASIC_MAPPING_WORKLOAD,
>>> -                                                    smu->power_profile_mode);
>>> +                                                    profile_mode);
>>>       if (workload_type < 0)
>>>               return -EINVAL;
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>>>                                   smu->workload_mask, NULL);
>>>       if (ret)
>>>               dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
>>> -     else
>>> -             smu_cmn_assign_power_profile(smu);
>>>
>>>       return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>>> index 844532a9b641..4967e087088b 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
>>> @@ -1704,22 +1704,23 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *
>>>       return size;
>>>  }
>>>
>>> -static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
>>> +static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu,
>>> +                                              long *input, uint32_t size,
>>> +                                              bool enable)
>>>  {
>>>
>>>       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>>>       DpmActivityMonitorCoeffInt_t *activity_monitor =
>>>               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
>>> +     uint32_t profile_mode = input[size];
>>>       int workload_type, ret = 0;
>>>
>>> -     smu->power_profile_mode = input[size];
>>> -
>>> -     if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
>>> -             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
>>> +     if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>>>               return -EINVAL;
>>>       }
>>>
>>> -     if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +     if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>>               if (size != 10)
>>>                       return -EINVAL;
>>>
>>> @@ -1781,16 +1782,18 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>>       workload_type = smu_cmn_to_asic_specific_index(smu,
>>>                                                      CMN2ASIC_MAPPING_WORKLOAD,
>>> -                                                    smu->power_profile_mode);
>>> +                                                    profile_mode);
>>>       if (workload_type < 0)
>>>               return -EINVAL;
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>>>                                   smu->workload_mask, NULL);
>>>       if (ret)
>>>               dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
>>> -     else
>>> -             smu_cmn_assign_power_profile(smu);
>>>
>>>       return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
>>> index f89c487dce72..b5dba4826f81 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
>>> @@ -1056,7 +1056,8 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu,
>>>       return size;
>>>  }
>>>
>>> -static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
>>> +static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
>>> +                                       uint32_t size, bool enable)
>>>  {
>>>       int workload_type, ret;
>>>       uint32_t profile_mode = input[size];
>>> @@ -1067,7 +1068,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
>>>       }
>>>
>>>       if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
>>> -                     profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
>>> +         profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
>>>               return 0;
>>>
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>> @@ -1080,18 +1081,18 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
>>>               return -EINVAL;
>>>       }
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
>>>                                   smu->workload_mask,
>>>                                   NULL);
>>> -     if (ret) {
>>> +     if (ret)
>>>               dev_err_once(smu->adev->dev, "Fail to set workload type %d\n",
>>>                                       workload_type);
>>> -             return ret;
>>> -     }
>>> -
>>> -     smu_cmn_assign_power_profile(smu);
>>>
>>> -     return 0;
>>> +     return ret;
>>>  }
>>>
>>>  static int vangogh_set_soft_freq_limited_range(struct smu_context *smu,
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
>>> index 75a9ea87f419..2d1eae79ab9d 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
>>> @@ -864,7 +864,8 @@ static int renoir_force_clk_levels(struct smu_context *smu,
>>>       return ret;
>>>  }
>>>
>>> -static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
>>> +static int renoir_set_power_profile_mode(struct smu_context *smu, long *input,
>>> +                                      uint32_t size, bool enable)
>>>  {
>>>       int workload_type, ret;
>>>       uint32_t profile_mode = input[size];
>>> @@ -875,7 +876,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
>>>       }
>>>
>>>       if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
>>> -                     profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
>>> +         profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
>>>               return 0;
>>>
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>> @@ -891,17 +892,17 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
>>>               return -EINVAL;
>>>       }
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
>>>                                   smu->workload_mask,
>>>                                   NULL);
>>> -     if (ret) {
>>> +     if (ret)
>>>               dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
>>> -             return ret;
>>> -     }
>>>
>>> -     smu_cmn_assign_power_profile(smu);
>>> -
>>> -     return 0;
>>> +     return ret;
>>>  }
>>>
>>>  static int renoir_set_peak_clock_by_device(struct smu_context *smu)
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
>>> index 80c6b1e523aa..3cc734331891 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
>>> @@ -2573,22 +2573,22 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu,
>>>
>>>  static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
>>>                                             long *input,
>>> -                                           uint32_t size)
>>> +                                           uint32_t size,
>>> +                                           bool enable)
>>>  {
>>>       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>>>       DpmActivityMonitorCoeffInt_t *activity_monitor =
>>>               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
>>> +     uint32_t profile_mode = input[size];
>>>       int workload_type, ret = 0;
>>>       u32 workload_mask;
>>>
>>> -     smu->power_profile_mode = input[size];
>>> -
>>> -     if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
>>> -             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
>>> +     if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
>>> +             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>>>               return -EINVAL;
>>>       }
>>>
>>> -     if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +     if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>>               if (size != 9)
>>>                       return -EINVAL;
>>>
>>> @@ -2641,13 +2641,18 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>>       workload_type = smu_cmn_to_asic_specific_index(smu,
>>>                                                      CMN2ASIC_MAPPING_WORKLOAD,
>>> -                                                    smu->power_profile_mode);
>>> +                                                    profile_mode);
>>>
>>>       if (workload_type < 0)
>>>               return -EINVAL;
>>>
>>>       workload_mask = 1 << workload_type;
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= workload_mask;
>>> +     else
>>> +             smu->workload_mask &= ~workload_mask;
>>> +
>>>       /* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
>>>       if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
>>>            ((smu->adev->pm.fw_version == 0x004e6601) ||
>>> @@ -2658,25 +2663,13 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
>>>                                                              CMN2ASIC_MAPPING_WORKLOAD,
>>>                                                              PP_SMC_POWER_PROFILE_POWERSAVING);
>>>               if (workload_type >= 0)
>>> -                     workload_mask |= 1 << workload_type;
>>> +                     smu->workload_mask |= 1 << workload_type;
>>>       }
>>>
>>> -     smu->workload_mask |= workload_mask;
>>>       ret = smu_cmn_send_smc_msg_with_param(smu,
>>>                                              SMU_MSG_SetWorkloadMask,
>>>                                              smu->workload_mask,
>>>                                              NULL);
>>> -     if (!ret) {
>>> -             smu_cmn_assign_power_profile(smu);
>>> -             if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) {
>>> -                     workload_type = smu_cmn_to_asic_specific_index(smu,
>>> -                                                            CMN2ASIC_MAPPING_WORKLOAD,
>>> -                                                            PP_SMC_POWER_PROFILE_FULLSCREEN3D);
>>> -                     smu->power_profile_mode = smu->workload_mask & (1 << workload_type)
>>> -                                                                             ? PP_SMC_POWER_PROFILE_FULLSCREEN3D
>>> -                                                                             : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
>>> -             }
>>> -     }
>>>
>>>       return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
>>> index c5d3e25cc967..1aafd23857f0 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
>>> @@ -2528,22 +2528,23 @@ do {                                                                                                  \
>>>       return result;
>>>  }
>>>
>>> -static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
>>> +static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu,
>>> +                                           long *input, uint32_t size,
>>> +                                           bool enable)
>>>  {
>>>
>>>       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>>>       DpmActivityMonitorCoeffInt_t *activity_monitor =
>>>               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
>>> +     uint32_t profile_mode = input[size];
>>>       int workload_type, ret = 0;
>>>
>>> -     smu->power_profile_mode = input[size];
>>> -
>>> -     if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
>>> -             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
>>> +     if (profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
>>> +             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>>>               return -EINVAL;
>>>       }
>>>
>>> -     if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +     if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>>               if (size != 8)
>>>                       return -EINVAL;
>>>
>>> @@ -2590,17 +2591,19 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>>       workload_type = smu_cmn_to_asic_specific_index(smu,
>>>                                                      CMN2ASIC_MAPPING_WORKLOAD,
>>> -                                                    smu->power_profile_mode);
>>> +                                                    profile_mode);
>>>       if (workload_type < 0)
>>>               return -EINVAL;
>>>
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>>>                                   smu->workload_mask, NULL);
>>>
>>>       if (ret)
>>>               dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
>>> -     else
>>> -             smu_cmn_assign_power_profile(smu);
>>>
>>>       return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
>>> index 59b369eff30f..b64490bcfd62 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
>>> @@ -1719,21 +1719,21 @@ static int smu_v14_0_2_get_power_profile_mode(struct smu_context *smu,
>>>
>>>  static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
>>>                                             long *input,
>>> -                                           uint32_t size)
>>> +                                           uint32_t size,
>>> +                                           bool enable)
>>>  {
>>>       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
>>>       DpmActivityMonitorCoeffInt_t *activity_monitor =
>>>               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
>>> +     uint32_t profile_mode = input[size];
>>>       int workload_type, ret = 0;
>>> -     uint32_t current_profile_mode = smu->power_profile_mode;
>>> -     smu->power_profile_mode = input[size];
>>>
>>> -     if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
>>> -             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
>>> +     if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
>>> +             dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
>>>               return -EINVAL;
>>>       }
>>>
>>> -     if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>> +     if (enable && profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
>>>               if (size != 9)
>>>                       return -EINVAL;
>>>
>>> @@ -1783,23 +1783,24 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
>>>               }
>>>       }
>>>
>>> -     if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
>>> -             smu_v14_0_deep_sleep_control(smu, false);
>>> -     else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
>>> -             smu_v14_0_deep_sleep_control(smu, true);
>>> -
>>>       /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
>>>       workload_type = smu_cmn_to_asic_specific_index(smu,
>>>                                                      CMN2ASIC_MAPPING_WORKLOAD,
>>> -                                                    smu->power_profile_mode);
>>> +                                                    profile_mode);
>>>       if (workload_type < 0)
>>>               return -EINVAL;
>>>
>>> -     ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>>> -                                                                               smu->workload_mask, NULL);
>>> +     if (enable)
>>> +             smu->workload_mask |= (1 << workload_type);
>>> +     else
>>> +             smu->workload_mask &= ~(1 << workload_type);
>>>
>>> -     if (!ret)
>>> -             smu_cmn_assign_power_profile(smu);
>>> +     /* disable deep sleep if compute is enabled */
>>> +     if (profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
>>> +             smu_v14_0_deep_sleep_control(smu, !enable);
>>> +
>>> +     ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
>>> +                                           smu->workload_mask, NULL);
>>>
>>>       return ret;
>>>  }
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
>>> index fd2aa949538e..63c4f75fa118 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
>>> @@ -1141,14 +1141,6 @@ int smu_cmn_set_mp1_state(struct smu_context *smu,
>>>       return ret;
>>>  }
>>>
>>> -void smu_cmn_assign_power_profile(struct smu_context *smu)
>>> -{
>>> -     uint32_t index;
>>> -     index = fls(smu->workload_mask);
>>> -     index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
>>> -     smu->power_profile_mode = smu->workload_setting[index];
>>> -}
>>> -
>>>  bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev)
>>>  {
>>>       struct pci_dev *p = NULL;
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
>>> index 8a801e389659..1de685defe85 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
>>> @@ -130,8 +130,6 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev);
>>>  int smu_cmn_set_mp1_state(struct smu_context *smu,
>>>                         enum pp_mp1_state mp1_state);
>>>
>>> -void smu_cmn_assign_power_profile(struct smu_context *smu);
>>> -
>>>  /*
>>>   * Helper function to make sysfs_emit_at() happy. Align buf to
>>>   * the current page boundary and record the offset.