[AMD Official Use Only] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Monday, January 10, 2022 3:36 PM > To: Quan, Evan <Evan.Quan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx> > Subject: Re: [PATCH] drm/amd/pm: correct the checks for fan attributes > support > > > > On 1/10/2022 11:30 AM, Evan Quan wrote: > > Before we relied on the return values from the corresponding interfaces. > > That is with low efficiency. And the wrong intermediate variable used > > makes the fan mode stuck at manual mode which then causes overheating > in > > 3D graphics tests. > > > > Signed-off-by: Evan Quan <evan.quan@xxxxxxx> > > Change-Id: Ia93ccf3b929c12e6d10b50c8f3596783ac63f0e3 > > --- > > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 23 > +++++++++++++++++++++++ > > drivers/gpu/drm/amd/pm/amdgpu_pm.c | 20 ++++++++++---------- > > drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 12 ++++++++++++ > > 3 files changed, 45 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > index 68d2e80a673b..e732418a9558 100644 > > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > > @@ -1547,3 +1547,26 @@ int amdgpu_dpm_get_dpm_clock_table(struct > amdgpu_device *adev, > > > > return ret; > > } > > + > > +int amdgpu_dpm_is_fan_operation_supported(struct amdgpu_device > *adev, > > + enum fan_operation_id id) > > +{ > > + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; > > + > > + switch (id) { > > + case FAN_CONTROL_MODE_RETRIEVING: > > + return pp_funcs->get_fan_control_mode ? 1 : 0; > > + case FAN_CONTROL_MODE_SETTING: > > + return pp_funcs->set_fan_control_mode ? 1 : 0; > > + case FAN_SPEED_PWM_RETRIEVING: > > + return pp_funcs->get_fan_speed_pwm ? 1 : 0; > > + case FAN_SPEED_PWM_SETTING: > > + return pp_funcs->set_fan_speed_pwm ? 1 : 0; > > + case FAN_SPEED_RPM_RETRIEVING: > > + return pp_funcs->get_fan_speed_rpm ? 1 : 0; > > + case FAN_SPEED_RPM_SETTING: > > + return pp_funcs->set_fan_speed_rpm ? 1 : 0; > > + default: > > + return 0; > > + } > > +} > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c > b/drivers/gpu/drm/amd/pm/amdgpu_pm.c > > index d3eab245e0fe..57721750c51a 100644 > > --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c > > +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c > > @@ -3263,15 +3263,15 @@ static umode_t > hwmon_attributes_visible(struct kobject *kobj, > > return 0; > > > > /* mask fan attributes if we have no bindings for this asic to expose > */ > > - if (((amdgpu_dpm_get_fan_speed_pwm(adev, &speed) == -EINVAL) > && > > + if ((!amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_PWM_RETRIEVING) && > > As per the current logic, it's really checking the hardware registers. [Quan, Evan] I probably should mention the "current" version you see now is actually a regression introduced by the commit below: 801771de0331 drm/amd/pm: do not expose power implementation details to amdgpu_pm.c The very early version(which works good) is something like below: - if (!is_support_sw_smu(adev)) { - /* mask fan attributes if we have no bindings for this asic to expose */ - if ((!adev->powerplay.pp_funcs->get_fan_speed_pwm && - attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ - (!adev->powerplay.pp_funcs->get_fan_control_mode && - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ - effective_mode &= ~S_IRUGO; So, the changes here are really just back to old working version. It aims to provide a quick fix for the failures reported by CQE. > For ex: we could have some SKUs that have PMFW based fan control and for > some other SKUs, AIBs could be having a different cooling solution which > doesn't make use of PMFW. > > > > attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query > fan */ > > - ((amdgpu_dpm_get_fan_control_mode(adev, &speed) == - > EOPNOTSUPP) && > > + (!amdgpu_dpm_is_fan_operation_supported(adev, > FAN_CONTROL_MODE_RETRIEVING) && > > attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't > query state */ > > effective_mode &= ~S_IRUGO; > > > > - if (((amdgpu_dpm_set_fan_speed_pwm(adev, speed) == -EINVAL) > && > > + if ((!amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_PWM_SETTING) && > > attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't > manage fan */ > > - ((amdgpu_dpm_set_fan_control_mode(adev, speed) == - > EOPNOTSUPP) && > > + (!amdgpu_dpm_is_fan_operation_supported(adev, > FAN_CONTROL_MODE_SETTING) && > > attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't > manage state */ > > effective_mode &= ~S_IWUSR; > > > > @@ -3291,16 +3291,16 @@ static umode_t > hwmon_attributes_visible(struct kobject *kobj, > > return 0; > > > > /* hide max/min values if we can't both query and manage the fan */ > > - if (((amdgpu_dpm_set_fan_speed_pwm(adev, speed) == -EINVAL) > && > > - (amdgpu_dpm_get_fan_speed_pwm(adev, &speed) == -EINVAL) > && > > - (amdgpu_dpm_set_fan_speed_rpm(adev, speed) == -EINVAL) > && > > - (amdgpu_dpm_get_fan_speed_rpm(adev, &speed) == -EINVAL)) > && > > + if ((!amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_PWM_SETTING) && > > + !amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_PWM_RETRIEVING) && > > + !amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_RPM_SETTING) && > > + !amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_RPM_RETRIEVING)) && > > If this is the case, I think we should set pm.no_fan since nothing is > possible. [Quan, Evan] Yep, I agree a more optimized version should be something like that. Let's take this a quick solution and do further optimizations later. BR Evan > > Thanks, > Lijo > > > (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || > > attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) > > return 0; > > > > - if ((amdgpu_dpm_set_fan_speed_rpm(adev, speed) == -EINVAL) > && > > - (amdgpu_dpm_get_fan_speed_rpm(adev, &speed) == -EINVAL) > && > > + if ((!amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_RPM_SETTING) && > > + !amdgpu_dpm_is_fan_operation_supported(adev, > FAN_SPEED_RPM_RETRIEVING)) && > > (attr == &sensor_dev_attr_fan1_max.dev_attr.attr || > > attr == &sensor_dev_attr_fan1_min.dev_attr.attr)) > > return 0; > > diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > > index ba857ca75392..9e18151a3c46 100644 > > --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > > +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > > @@ -330,6 +330,16 @@ struct amdgpu_pm { > > bool pp_force_state_enabled; > > }; > > > > +enum fan_operation_id > > +{ > > + FAN_CONTROL_MODE_RETRIEVING = 0, > > + FAN_CONTROL_MODE_SETTING = 1, > > + FAN_SPEED_PWM_RETRIEVING = 2, > > + FAN_SPEED_PWM_SETTING = 3, > > + FAN_SPEED_RPM_RETRIEVING = 4, > > + FAN_SPEED_RPM_SETTING = 5, > > +}; > > + > > u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); > > int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum > amd_pp_sensors sensor, > > void *data, uint32_t *size); > > @@ -510,4 +520,6 @@ enum pp_smu_status > amdgpu_dpm_get_uclk_dpm_states(struct amdgpu_device *adev, > > unsigned int *num_states); > > int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev, > > struct dpm_clocks *clock_table); > > +int amdgpu_dpm_is_fan_operation_supported(struct amdgpu_device > *adev, > > + enum fan_operation_id id); > > #endif > >