Re: [PATCH] drm/amd: Don't wake dGPUs while reading sensors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8/23/2024 09:09, Alex Deucher wrote:
On Mon, Aug 19, 2024 at 10:30 PM Mario Limonciello <superm1@xxxxxxxxxx> wrote:

From: Mario Limonciello <mario.limonciello@xxxxxxx>

If the dGPU is off, then reading the sysfs files with a sensor monitoring
application will wake it. Change the behavior to return an error when the
dGPU is in D3cold.

I'm a little concerned that this will generate a flurry of bug reports
if this now reports an error.  One more comment below.


Do you have a particular app you're worried about, or just a general worry? I've had a lot of people reach out to me complaining about battery life on A+A systems, and it comes down to the use of sensor monitoring software waking the dGPU which people don't seem to expect.

I did double check that software like 'sensors', 'mission center' and 'nvtop' don't freak out from this change.

Here is what 'sensors' shows on my local workstation with this change.

amdgpu-pci-6100
Adapter: PCI adapter
vddgfx:           N/A
ERROR: Can't get value of subfeature fan1_min: Can't read
ERROR: Can't get value of subfeature fan1_max: Can't read
fan1:             N/A  (min =    0 RPM, max =    0 RPM)
edge:             N/A  (crit = +97.0°C, hyst = -273.1°C)
ERROR: Can't get value of subfeature power1_cap: Can't read
PPT:              N/A  (cap =   0.00 W)


Signed-off-by: Mario Limonciello <mario.limonciello@xxxxxxx>
---
  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 90 +++++++++++++++---------------
  1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index c11952a4389bc..d6e38466fbb82 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -142,7 +142,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -173,7 +173,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (strncmp("battery", buf, strlen("battery")) == 0)
@@ -270,7 +270,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -309,7 +309,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (strncmp("low", buf, strlen("low")) == 0) {
@@ -371,7 +371,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -409,7 +409,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -448,7 +448,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (adev->pm.pp_force_state_enabled)
@@ -471,7 +471,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         adev->pm.pp_force_state_enabled = false;
@@ -541,7 +541,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -577,7 +577,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -760,7 +760,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (count > 127 || count == 0)
@@ -862,7 +862,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -922,7 +922,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = kstrtou64(buf, 0, &featuremask);
@@ -957,7 +957,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -1026,7 +1026,7 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -1095,7 +1095,7 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = amdgpu_read_mask(buf, count, &mask);
@@ -1280,7 +1280,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -1309,7 +1309,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = kstrtol(buf, 0, &value);
@@ -1342,7 +1342,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -1371,7 +1371,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = kstrtol(buf, 0, &value);
@@ -1424,7 +1424,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -1463,7 +1463,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         tmp[0] = *(buf);
@@ -1517,7 +1517,7 @@ static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -1630,7 +1630,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (adev->flags & AMD_IS_APU)
@@ -1673,7 +1673,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (adev->unique_id)
@@ -1846,7 +1846,7 @@ static ssize_t amdgpu_get_pm_metrics(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -1887,7 +1887,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(ddev->dev);
@@ -2005,7 +2005,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         r = pm_runtime_get_sync(ddev->dev);
@@ -2227,7 +2227,7 @@ static ssize_t amdgpu_get_xgmi_plpd_policy(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         mode = amdgpu_dpm_get_xgmi_plpd_mode(adev, &mode_desc);
@@ -2250,7 +2250,7 @@ static ssize_t amdgpu_set_xgmi_plpd_policy(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = kstrtos32(buf, 0, &mode);
@@ -2652,7 +2652,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2684,7 +2684,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = kstrtoint(buf, 10, &value);
@@ -2742,7 +2742,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = kstrtou32(buf, 10, &value);
@@ -2787,7 +2787,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2817,7 +2817,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2881,7 +2881,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2912,7 +2912,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = kstrtou32(buf, 10, &value);
@@ -2956,7 +2956,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2988,7 +2988,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         err = kstrtoint(buf, 10, &value);
@@ -3128,7 +3128,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -3209,7 +3209,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         if (amdgpu_sriov_vf(adev))
@@ -3663,7 +3663,7 @@ static int amdgpu_retrieve_od_settings(struct amdgpu_device *adev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = pm_runtime_get_sync(adev->dev);
@@ -3747,7 +3747,7 @@ amdgpu_distribute_custom_od_settings(struct amdgpu_device *adev,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = parse_input_od_command_lines(in_buf,
@@ -4626,7 +4626,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

I'd prefer to keep the current behavior for debugfs.

OK.  I'll exclude it for debugfs in the next spin.


Alex


         r = pm_runtime_get_sync(dev->dev);
@@ -4671,7 +4671,7 @@ static ssize_t amdgpu_pm_prv_buffer_read(struct file *f, char __user *buf,

         if (amdgpu_in_reset(adev))
                 return -EPERM;
-       if (adev->in_suspend && !adev->in_runpm)
+       if (adev->in_suspend || adev->in_runpm)
                 return -EPERM;

         ret = amdgpu_dpm_get_smu_prv_buf_details(adev, &smu_prv_buf, &smu_prv_buf_size);
--
2.43.0





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux