User can check and set the enablement of throttling logging and the interval between each logging. V2: simplify the sysfs interface(no string parsing) V3: add proper lock protection on updating throttling_logging_rs.interval Change-Id: Id37710e1e7fe0aaf7cc858a554db60e583be611d Signed-off-by: Evan Quan <evan.quan@xxxxxxx> Reviewed-by: Christian König <christian.koenig@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 71 ++++++++++++++++++++++ drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 10 ++- 4 files changed, 89 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 894fe58276ae..6e74ff9990e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -996,6 +996,9 @@ struct amdgpu_device { char serial[16]; struct amdgpu_autodump autodump; + + atomic_t throttling_logging_enabled; + struct ratelimit_state throttling_logging_rs; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f0e8da7bacf..f1bfdba8bd6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3035,6 +3035,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0; + atomic_set(&adev->throttling_logging_enabled, 1); + /* + * if the throttling continues, the logging will be performed every + * minute to avoid log flooding. "-1" applied here due to the thermal + * throttling interrupt comes every 1 second. So, the total logging + * interval is 59 seconds(retelimited printk interval) + 1(waiting + * for throttling interrup) = 60 seconds. + */ + ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); + ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); + /* Registers mapping */ /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 808884aaf36d..37fc54c7f586 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1808,6 +1808,76 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, return 0; } +/** + * DOC: thermal_throttling_logging + * + * Thermal throttling will pull down the clock frequency and thus the performance. + * It's an useful mechanism to protect the chip from overheat. And due to performace + * impact, it will be good to prompt(log) user those thermal throttling events. + * + * Reading back the file will show you the status(enabled or disabled) of the thermal + * throttling logging and the current interval(in seconds) between each logging. + * + * By writing the file with a new logging "interval", you can manually adjust these + * settings. The "interval" specifies the interval(in seconds) between each logging. + * With "interval > 0 && interval <= 3600", the logging interval will be updated + * on request. And the logging feature will be reenabled if it was disabled. + * With "interval <= 0", the thermal throttling logging feature will be disabled. + * And for other setting("interval > 3600"), it is invalid and will be ignored + * by driver. + */ +static ssize_t amdgpu_get_thermal_throttling_logging(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + return snprintf(buf, PAGE_SIZE, "%s: thermal throttling logging %s, with interval %d seconds\n", + adev->ddev->unique, + atomic_read(&adev->throttling_logging_enabled) ? "enabled" : "disabled", + adev->throttling_logging_rs.interval / HZ + 1); +} + +static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + long throttling_logging_interval; + unsigned long flags; + int ret = 0; + + ret = kstrtol(buf, 0, &throttling_logging_interval); + if (ret) + return ret; + + if (throttling_logging_interval > 3600) + return -EINVAL; + + if (throttling_logging_interval > 0) { + raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags); + /* + * Reset the ratelimit timer internals. + * This can effectively restart the timer. + */ + adev->throttling_logging_rs.interval = + (throttling_logging_interval - 1) * HZ; + adev->throttling_logging_rs.begin = 0; + adev->throttling_logging_rs.printed = 0; + adev->throttling_logging_rs.missed = 0; + raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags); + + atomic_set(&adev->throttling_logging_enabled, 1); + } else { + atomic_set(&adev->throttling_logging_enabled, 0); + } + + return count; +} + static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), @@ -1830,6 +1900,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging, ATTR_FLAG_BASIC), }; static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 7a2e855608de..edc9782743d2 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1533,11 +1533,6 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, */ uint32_t ctxid = entry->src_data[0]; uint32_t data; - /* - * if the throttling continues, the logging will be performed every - * minute to avoid log flooding. - */ - static DEFINE_RATELIMIT_STATE(ratelimit_state, 60 * HZ, 1); if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1582,7 +1577,10 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, smu_v11_0_ack_ac_dc_interrupt(&adev->smu); break; case 0x7: - if (__ratelimit(&ratelimit_state)) + if (!atomic_read(&adev->throttling_logging_enabled)) + return 0; + + if (__ratelimit(&adev->throttling_logging_rs)) smu_log_thermal_throttling(smu); break; -- 2.26.2 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx