On 9/25/2024 8:02 PM, Pierre-Eric Pelloux-Prayer wrote: > > > Le 25/09/2024 à 15:35, Lazar, Lijo a écrit : >> >> >> On 9/25/2024 1:24 PM, Pierre-Eric Pelloux-Prayer wrote: >>> Don't wake up the GPU for reading pm values. Instead, take a runtime >>> powermanagement ref when trying to read it iff the GPU is already >>> awake. >>> >>> This avoids spurious wake ups (eg: from applets). >>> >>> We use pm_runtime_get_if_in_active(ign_usage_count=true) because we care >>> about "is the GPU awake?" not about "is the GPU awake and something else >>> prevents suspend?". >>> >>> Tested-by: Mario Limonciello <mario.limonciello@xxxxxxx> >>> Signed-off-by: Pierre-Eric Pelloux-Prayer >>> <pierre-eric.pelloux-prayer@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/pm/amdgpu_pm.c | 138 ++++++++++++++--------------- >>> 1 file changed, 69 insertions(+), 69 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c >>> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c >>> index c8f34b1a4d8e..f1f339b75380 100644 >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c >>> @@ -145,9 +145,9 @@ static ssize_t amdgpu_get_power_dpm_state(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >> >> Not all our devices support runtime PM. >> >> As per the API documentation - >> >> "-EINVAL is returned if runtime PM is disabled for the >> device, in which case also the usage_count will remain unmodified." > > This doc is about "dev->power.disable_depth > 0", not about runtime PM > being disabled (like you would get by using pm_runtime_forbid). > > When runtime PM is disabled, usage_count is always >= 1 and the status > is RPM_ACTIVE so pm_runtime_get_if_active will succeed. > > I tested on 2 dGPUs without runtime PM, and both seemed to work fine. > Thanks for clarifying. Found that default enablement is taken care by pci base driver. https://elixir.bootlin.com/linux/v6.11/source/drivers/pci/pci.c#L3173 Thanks, Lijo > Pierre-Eric > >> >> If that's true, this won't be working on devices without runtime PM >> support. >> >> Thanks, >> Lijo >>> amdgpu_dpm_get_current_power_state(adev, &pm); >>> @@ -268,9 +268,9 @@ static ssize_t >>> amdgpu_get_power_dpm_force_performance_level(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> level = amdgpu_dpm_get_performance_level(adev); >>> @@ -364,9 +364,9 @@ static ssize_t amdgpu_get_pp_num_states(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> if (amdgpu_dpm_get_pp_num_states(adev, &data)) >>> memset(&data, 0, sizeof(data)); >>> @@ -399,9 +399,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> amdgpu_dpm_get_current_power_state(adev, &pm); >>> @@ -526,9 +526,9 @@ static ssize_t amdgpu_get_pp_table(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> size = amdgpu_dpm_get_pp_table(adev, &table); >>> @@ -840,9 +840,9 @@ static ssize_t >>> amdgpu_get_pp_od_clk_voltage(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> for (clk_index = 0 ; clk_index < 6 ; clk_index++) { >>> ret = amdgpu_dpm_emit_clock_levels(adev, >>> od_clocks[clk_index], buf, &size); >>> @@ -930,9 +930,9 @@ static ssize_t amdgpu_get_pp_features(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> size = amdgpu_dpm_get_ppfeature_status(adev, buf); >>> if (size <= 0) >>> @@ -996,9 +996,9 @@ static ssize_t amdgpu_get_pp_dpm_clock(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> ret = amdgpu_dpm_emit_clock_levels(adev, type, buf, &size); >>> if (ret == -ENOENT) >>> @@ -1245,9 +1245,9 @@ static ssize_t amdgpu_get_pp_sclk_od(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> value = amdgpu_dpm_get_sclk_od(adev); >>> @@ -1302,9 +1302,9 @@ static ssize_t amdgpu_get_pp_mclk_od(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> value = amdgpu_dpm_get_mclk_od(adev); >>> @@ -1379,9 +1379,9 @@ static ssize_t >>> amdgpu_get_pp_power_profile_mode(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> size = amdgpu_dpm_get_power_profile_mode(adev, buf); >>> if (size <= 0) >>> @@ -1467,9 +1467,9 @@ static int >>> amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - r = pm_runtime_resume_and_get(adev->dev); >>> - if (r < 0) >>> - return r; >>> + r = pm_runtime_get_if_active(adev->dev, true); >>> + if (r <= 0) >>> + return r ?: -EPERM; >>> /* get the sensor value */ >>> r = amdgpu_dpm_read_sensor(adev, sensor, query, &size); >>> @@ -1583,9 +1583,9 @@ static ssize_t amdgpu_get_pcie_bw(struct device >>> *dev, >>> if (!adev->asic_funcs->get_pcie_usage) >>> return -ENODATA; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> amdgpu_asic_get_pcie_usage(adev, &count0, &count1); >>> @@ -1711,9 +1711,9 @@ static ssize_t >>> amdgpu_get_apu_thermal_cap(struct device *dev, >>> struct drm_device *ddev = dev_get_drvdata(dev); >>> struct amdgpu_device *adev = drm_to_adev(ddev); >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> ret = amdgpu_dpm_get_apu_thermal_limit(adev, &limit); >>> if (!ret) >>> @@ -1787,9 +1787,9 @@ static ssize_t amdgpu_get_pm_metrics(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> size = amdgpu_dpm_get_pm_metrics(adev, buf, PAGE_SIZE); >>> @@ -1825,9 +1825,9 @@ static ssize_t amdgpu_get_gpu_metrics(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(ddev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(ddev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics); >>> if (size <= 0) >>> @@ -2700,9 +2700,9 @@ static ssize_t >>> amdgpu_hwmon_get_pwm1_enable(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(adev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(adev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> ret = amdgpu_dpm_get_fan_control_mode(adev, &pwm_mode); >>> @@ -2828,9 +2828,9 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct >>> device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - err = pm_runtime_resume_and_get(adev->dev); >>> - if (err < 0) >>> - return err; >>> + err = pm_runtime_get_if_active(adev->dev, true); >>> + if (err <= 0) >>> + return err ?: -EPERM; >>> err = amdgpu_dpm_get_fan_speed_pwm(adev, &speed); >>> @@ -2855,9 +2855,9 @@ static ssize_t >>> amdgpu_hwmon_get_fan1_input(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - err = pm_runtime_resume_and_get(adev->dev); >>> - if (err < 0) >>> - return err; >>> + err = pm_runtime_get_if_active(adev->dev, true); >>> + if (err <= 0) >>> + return err ?: -EPERM; >>> err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); >>> @@ -2916,9 +2916,9 @@ static ssize_t >>> amdgpu_hwmon_get_fan1_target(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - err = pm_runtime_resume_and_get(adev->dev); >>> - if (err < 0) >>> - return err; >>> + err = pm_runtime_get_if_active(adev->dev, true); >>> + if (err <= 0) >>> + return err ?: -EPERM; >>> err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); >>> @@ -2986,9 +2986,9 @@ static ssize_t >>> amdgpu_hwmon_get_fan1_enable(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(adev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(adev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> ret = amdgpu_dpm_get_fan_control_mode(adev, &pwm_mode); >>> @@ -3152,9 +3152,9 @@ static ssize_t >>> amdgpu_hwmon_show_power_cap_generic(struct device *dev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - r = pm_runtime_resume_and_get(adev->dev); >>> - if (r < 0) >>> - return r; >>> + r = pm_runtime_get_if_active(adev->dev, true); >>> + if (r <= 0) >>> + return r ?: -EPERM; >>> r = amdgpu_dpm_get_power_limit(adev, &limit, >>> pp_limit_level, power_type); >>> @@ -3685,9 +3685,9 @@ static int amdgpu_retrieve_od_settings(struct >>> amdgpu_device *adev, >>> if (adev->in_suspend && !adev->in_runpm) >>> return -EPERM; >>> - ret = pm_runtime_resume_and_get(adev->dev); >>> - if (ret < 0) >>> - return ret; >>> + ret = pm_runtime_get_if_active(adev->dev, true); >>> + if (ret <= 0) >>> + return ret ?: -EPERM; >>> size = amdgpu_dpm_print_clock_levels(adev, od_type, buf); >>> if (size == 0)