[AMD Official Use Only - General] Series is Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Regards, Hawking -----Original Message----- From: Kamal, Asad <Asad.Kamal@xxxxxxx> Sent: Wednesday, August 16, 2023 20:32 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Zhang, Morris <Shiwu.Zhang@xxxxxxx>; Kamal, Asad <Asad.Kamal@xxxxxxx>; Ma, Le <Le.Ma@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx> Subject: [PATCH v2 2/2] drm/amd/pm: Add critical temp for GC v9.4.3 Add critical temperature message support func for smu v13.0.6 and expose critical temperature as part of hw mon attributes for GC v9.4.3 v2: Added comment for pmfw version requirement & move the check to get_thermal_temperature_range function Signed-off-by: Asad Kamal <asad.kamal@xxxxxxx> Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 +-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 50 +++++++++++++++++++ 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 5b1d73b00ef7..f03647fa3df6 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3311,8 +3311,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, (gc_ver != IP_VERSION(9, 4, 3)) && (attr == &sensor_dev_attr_temp2_input.dev_attr.attr || attr == &sensor_dev_attr_temp2_label.dev_attr.attr || + attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || attr == &sensor_dev_attr_temp3_input.dev_attr.attr || - attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) + attr == &sensor_dev_attr_temp3_label.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit.dev_attr.attr)) return 0; /* hotspot temperature for gc 9,4,3*/ @@ -3324,9 +3326,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* only SOC15 dGPUs support hotspot and mem temperatures */ if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0) || (gc_ver == IP_VERSION(9, 4, 3))) && - (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || - attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || - attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || + (attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 297b70b9388f..c2904791fc6f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -84,6 +84,7 @@ __SMU_DUMMY_MAP(SetTjMax), \ __SMU_DUMMY_MAP(SetFanTemperatureTarget), \ __SMU_DUMMY_MAP(PrepareMp1ForUnload), \ + __SMU_DUMMY_MAP(GetCTFLimit), \ __SMU_DUMMY_MAP(DramLogSetDramAddrHigh), \ __SMU_DUMMY_MAP(DramLogSetDramAddrLow), \ __SMU_DUMMY_MAP(DramLogSetDramSize), \ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 6ed9cd0a1e4e..7d8af9b309b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -132,6 +132,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 0), MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), + MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), }; static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { @@ -2081,6 +2082,54 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) return ret; } +static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu, + struct smu_temperature_range *range) { + struct amdgpu_device *adev = smu->adev; + u32 aid_temp, xcd_temp; + uint32_t smu_version; + u32 ccd_temp = 0; + int ret; + + if (amdgpu_sriov_vf(smu->adev)) + return 0; + + if (!range) + return -EINVAL; + + /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (smu_version < 0x554500) + return 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, + PPSMC_AID_THM_TYPE, &aid_temp); + if (ret) + goto failed; + + if (adev->flags & AMD_IS_APU) { + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, + PPSMC_CCD_THM_TYPE, &ccd_temp); + if (ret) + goto failed; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, + PPSMC_XCD_THM_TYPE, &xcd_temp); + if (ret) + goto failed; + + range->hotspot_crit_max = max3(aid_temp, xcd_temp, ccd_temp); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, + PPSMC_HBM_THM_TYPE, &range->mem_crit_max); + if (ret) + goto failed; + + return 0; +failed: + return ret; +} + static int smu_v13_0_6_mode1_reset(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -2177,6 +2226,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, + .get_thermal_temperature_range = +smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, .mode1_reset = smu_v13_0_6_mode1_reset, -- 2.34.1