This is a note to let you know that I've just added the patch titled drm/amdgpu: add the fan abnormal detection feature to the 6.1-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: drm-amdgpu-add-the-fan-abnormal-detection-feature.patch and it can be found in the queue-6.1 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From ef5fca9f7294509ee5013af9e879edc5837c1d6c Mon Sep 17 00:00:00 2001 From: lyndonli <Lyndon.Li@xxxxxxx> Date: Mon, 21 Nov 2022 09:10:20 +0800 Subject: drm/amdgpu: add the fan abnormal detection feature From: lyndonli <Lyndon.Li@xxxxxxx> commit ef5fca9f7294509ee5013af9e879edc5837c1d6c upstream. Update the SW CTF limit from existing register when there's a fan failure detected via SMU interrupt. Signed-off-by: lyndonli <Lyndon.Li@xxxxxxx> Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Reviewed-by: Kenneth Feng <kenneth.feng@xxxxxxx> Reviewed-by: Evan Quan <evan.quan@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 28 +++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 3 files changed, 30 insertions(+) --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -168,6 +168,7 @@ struct smu_temperature_range { int mem_crit_max; int mem_emergency_max; int software_shutdown_temp; + int software_shutdown_temp_offset; }; struct smu_state_validation_block { --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1381,6 +1381,7 @@ static int smu_v13_0_irq_process(struct */ uint32_t ctxid = entry->src_data[0]; uint32_t data; + uint32_t high; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1437,6 +1438,33 @@ static int smu_v13_0_irq_process(struct schedule_work(&smu->throttling_logging_work); break; + case 0x8: + high = smu->thermal_range.software_shutdown_temp + + smu->thermal_range.software_shutdown_temp_offset; + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, high); + dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", + high, + smu->thermal_range.software_shutdown_temp_offset); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + case 0x9: + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; } } } --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1288,6 +1288,7 @@ static int smu_v13_0_7_get_thermal_tempe range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } Patches currently in stable-queue which might be from Lyndon.Li@xxxxxxx are queue-6.1/drm-amdgpu-add-the-fan-abnormal-detection-feature.patch