[AMD Official Use Only - AMD Internal Distribution Only] The series was Reviewed-by: Likun Gao <Likun.Gao@xxxxxxx>. Regards, Likun -----Original Message----- From: Kenneth Feng <kenneth.feng@xxxxxxx> Sent: Thursday, May 16, 2024 9:11 AM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Gao, Likun <Likun.Gao@xxxxxxx>; Feng, Kenneth <Kenneth.Feng@xxxxxxx> Subject: [PATCH 2/2] drm/amd/pm: enable thermal alert on smu 14.0.2/3 enable thermal alert on smu 14.0.2/3 Signed-off-by: Kenneth Feng <kenneth.feng@xxxxxxx> --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 8 +++ .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 68 ++++++++++++++++++- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 35 +++++++++- 3 files changed, 108 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 3d34b3869df6..9b97a4e95c0f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -48,6 +48,10 @@ #define SMU14_TOOL_SIZE 0x19000 +#define CTF_OFFSET_EDGE 5 +#define CTF_OFFSET_HOTSPOT 5 +#define CTF_OFFSET_MEM 5 + extern const int decoded_link_speed[5]; extern const int decoded_link_width[7]; @@ -236,5 +240,9 @@ int smu_v14_0_od_edit_dpm_table(struct smu_context *smu, void smu_v14_0_set_smu_mailbox_registers(struct smu_context *smu); +int smu_v14_0_enable_thermal_alert(struct smu_context *smu); + +int smu_v14_0_disable_thermal_alert(struct smu_context *smu); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index fbfe9cae0e05..8cce17d1f230 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -38,6 +38,8 @@ #include "amdgpu_ras.h" #include "smu_cmn.h" +#include "asic_reg/thm/thm_14_0_2_offset.h" +#include "asic_reg/thm/thm_14_0_2_sh_mask.h" #include "asic_reg/mp/mp_14_0_2_offset.h" #include "asic_reg/mp/mp_14_0_2_sh_mask.h" @@ -853,12 +855,19 @@ static int smu_v14_0_set_irq_state(struct amdgpu_device *adev, unsigned tyep, enum amdgpu_interrupt_state state) { + struct smu_context *smu = adev->powerplay.pp_handle; + uint32_t low, high; uint32_t val = 0; switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* For THM irqs */ - // TODO + val = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 1); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, val); + + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_ENA, 0); /* For MP1 SW irqs */ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || @@ -875,7 +884,24 @@ static int smu_v14_0_set_irq_state(struct amdgpu_device *adev, break; case AMDGPU_IRQ_STATE_ENABLE: /* For THM irqs */ - // TODO + low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, + smu->thermal_range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + val = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff)); + val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, val); + + val = (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT); + val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); + val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_ENA, val); /* For MP1 SW irqs */ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) || @@ -1849,3 +1875,41 @@ int smu_v14_0_od_edit_dpm_table(struct smu_context *smu, return ret; } +static int smu_v14_0_allow_ih_interrupt(struct smu_context *smu) { + return smu_cmn_send_smc_msg(smu, + SMU_MSG_AllowIHHostInterrupt, + NULL); +} + +static int smu_v14_0_process_pending_interrupt(struct smu_context *smu) +{ + int ret = 0; + + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) + ret = smu_v14_0_allow_ih_interrupt(smu); + + return ret; +} + +int smu_v14_0_enable_thermal_alert(struct smu_context *smu) { + int ret = 0; + + if (!smu->irq_source.num_types) + return 0; + + ret = amdgpu_irq_get(smu->adev, &smu->irq_source, 0); + if (ret) + return ret; + + return smu_v14_0_process_pending_interrupt(smu); +} + +int smu_v14_0_disable_thermal_alert(struct smu_context *smu) { + if (!smu->irq_source.num_types) + return 0; + + return amdgpu_irq_put(smu->adev, &smu->irq_source, 0); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 1fbceb85d319..2b874e1ba4ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1276,10 +1276,41 @@ static int smu_v14_0_2_update_pcie_parameters(struct smu_context *smu, return 0; } +static const struct smu_temperature_range smu14_thermal_policy[] = { + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, +120000}, }; + static int smu_v14_0_2_get_thermal_temperature_range(struct smu_context *smu, struct smu_temperature_range *range) { - // TODO + struct smu_table_context *table_context = &smu->smu_table; + struct smu_14_0_2_powerplay_table *powerplay_table = + table_context->power_play_table; + PPTable_t *pptable = smu->smu_table.driver_pptable; + + if (amdgpu_sriov_vf(smu->adev)) + return 0; + + if (!range) + return -EINVAL; + + memcpy(range, &smu14_thermal_policy[0], sizeof(struct +smu_temperature_range)); + + range->max = pptable->CustomSkuTable.TemperatureLimit[TEMP_EDGE] * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->edge_emergency_max = (pptable->CustomSkuTable.TemperatureLimit[TEMP_EDGE] + CTF_OFFSET_EDGE) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_crit_max = pptable->CustomSkuTable.TemperatureLimit[TEMP_HOTSPOT] * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_emergency_max = (pptable->CustomSkuTable.TemperatureLimit[TEMP_HOTSPOT] + CTF_OFFSET_HOTSPOT) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_crit_max = pptable->CustomSkuTable.TemperatureLimit[TEMP_MEM] * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_emergency_max = (pptable->CustomSkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = +pptable->CustomSkuTable.FanAbnormalTempLimitOffset; return 0; } @@ -1866,6 +1897,8 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .update_pcie_parameters = smu_v14_0_2_update_pcie_parameters, .get_thermal_temperature_range = smu_v14_0_2_get_thermal_temperature_range, .register_irq_handler = smu_v14_0_register_irq_handler, + .enable_thermal_alert = smu_v14_0_enable_thermal_alert, + .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, -- 2.34.1