-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Alex Deucher
Sent: Thursday, March 25, 2021 5:18 AM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>
Subject: [PATCH] drm/amdgpu/pm: bail on sysfs/debugfs queries during platform suspend
The GPU is in the process of being shutdown. Spurious queries during
suspend and resume can put the SMU into a bad state. Runtime PM is
handled dynamically so we check if we are in non-runtime suspend.
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
drivers/gpu/drm/amd/pm/amdgpu_pm.c | 98 ++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 2627870a786e..3c1b5483688b 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -129,6 +129,8 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -162,6 +164,8 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (strncmp("battery", buf, strlen("battery")) == 0)
state = POWER_STATE_TYPE_BATTERY;
@@ -268,6 +272,8 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -310,6 +316,8 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMD_DPM_FORCED_LEVEL_LOW;
@@ -408,6 +416,8 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -448,6 +458,8 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -484,6 +496,8 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (adev->pp_force_state_enabled)
return amdgpu_get_pp_cur_state(dev, attr, buf);
@@ -504,6 +518,8 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (strlen(buf) == 1)
adev->pp_force_state_enabled = false;
@@ -564,6 +580,8 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -602,6 +620,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -764,6 +784,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (count > 127)
return -EINVAL;
@@ -865,6 +887,8 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -916,6 +940,8 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = kstrtou64(buf, 0, &featuremask);
if (ret)
@@ -959,6 +985,8 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1018,6 +1046,8 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1083,6 +1113,8 @@ static ssize_t amdgpu_set_pp_dpm_clock(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
@@ -1239,6 +1271,8 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1269,6 +1303,8 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = kstrtol(buf, 0, &value);
@@ -1312,6 +1348,8 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1342,6 +1380,8 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = kstrtol(buf, 0, &value);
@@ -1405,6 +1445,8 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1443,6 +1485,8 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
tmp[0] = *(buf);
tmp[1] = '\0';
@@ -1506,6 +1550,8 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
if (r < 0) {
@@ -1544,6 +1590,8 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(ddev->dev);
if (r < 0) {
@@ -1587,6 +1635,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (adev->flags & AMD_IS_APU)
return -ENODATA;
@@ -1628,6 +1678,8 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (adev->unique_id)
return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
@@ -1726,6 +1778,8 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
if (ret < 0) {
@@ -1954,6 +2008,8 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (channel >= PP_TEMP_MAX)
return -EINVAL;
@@ -2090,6 +2146,8 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (ret < 0) {
@@ -2122,6 +2180,8 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = kstrtoint(buf, 10, &value);
if (err)
@@ -2172,6 +2232,8 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (err < 0) {
@@ -2220,6 +2282,8 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (err < 0) {
@@ -2253,6 +2317,8 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (err < 0) {
@@ -2285,6 +2351,8 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2315,6 +2383,8 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2344,6 +2414,8 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (err < 0) {
@@ -2376,6 +2448,8 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (err < 0) {
@@ -2422,6 +2496,8 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (ret < 0) {
@@ -2455,6 +2531,8 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
err = kstrtoint(buf, 10, &value);
if (err)
@@ -2496,6 +2574,8 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2533,6 +2613,8 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
/* only APUs have vddnb */
if (!(adev->flags & AMD_IS_APU))
@@ -2575,6 +2657,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2619,6 +2703,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2656,6 +2742,8 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2693,6 +2781,8 @@ static ssize_t amdgpu_hwmon_show_power_cap_default(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2739,6 +2829,8 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
if (amdgpu_sriov_vf(adev))
return -EINVAL;
@@ -2780,6 +2872,8 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -2817,6 +2911,8 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
@@ -3390,6 +3486,8 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused)
if (amdgpu_in_reset(adev))
return -EPERM;
+ if (adev->in_suspend && !adev->in_runpm)
+ return -EPERM;
r = pm_runtime_get_sync(dev->dev);
if (r < 0) {