ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); + if (ret) + return -EOPNOTSUPP; A nit-pick comment, looks querying if_version in this case is redundant, passing NULL may be more efficient. Regards, Guchun -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Candice Li Sent: Wednesday, March 1, 2023 2:10 PM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Li, Candice <Candice.Li@xxxxxxx> Subject: [PATCH] drm/amd/pm: Enable ecc_info table support for smu v13_0_10 Support EccInfoTable which includes umc ras error count and error address. Signed-off-by: Candice Li <candice.li@xxxxxxx> Reviewed-by: Evan Quan <evan.quan@xxxxxxx> --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 923a9fb3c8873c..27448ffe60a439 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -46,6 +46,7 @@ #include "asic_reg/mp/mp_13_0_0_sh_mask.h" #include "smu_cmn.h" #include "amdgpu_ras.h" +#include "umc_v8_10.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -90,6 +91,12 @@ #define DEBUGSMC_MSG_Mode1Reset 2 +/* + * SMU_v13_0_10 supports ECCTABLE since version 80.34.0, + * use this to check ECCTABLE feature whether support */ #define +SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200 + static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -229,6 +236,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(I2C_COMMANDS), + TAB_MAP(ECCINFO), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -462,6 +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL); if (!smu_table->metrics_table) @@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) if (!smu_table->watermarks_table) goto err2_out; + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); + if (!smu_table->ecc_table) + goto err3_out; + return 0; +err3_out: + kfree(smu_table->watermarks_table); err2_out: kfree(smu_table->gpu_metrics_table); err1_out: @@ -2036,6 +2052,64 @@ static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, return ret; } +static int smu_v13_0_0_check_ecc_table_support(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t if_version = 0xff, smu_version = 0xff; + int ret = 0; + + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); + if (ret) + return -EOPNOTSUPP; + + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) && + (smu_version >= SUPPORT_ECCTABLE_SMU_13_0_10_VERSION)) + return ret; + else + return -EOPNOTSUPP; +} + +static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, + void *table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct amdgpu_device *adev = smu->adev; + EccInfoTable_t *ecc_table = NULL; + struct ecc_info_per_ch *ecc_info_per_channel = NULL; + int i, ret = 0; + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; + + ret = smu_v13_0_0_check_ecc_table_support(smu); + if (ret) + return ret; + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ECCINFO, + 0, + smu_table->ecc_table, + false); + if (ret) { + dev_info(adev->dev, "Failed to export SMU ecc table!\n"); + return ret; + } + + ecc_table = (EccInfoTable_t *)smu_table->ecc_table; + + for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) { + ecc_info_per_channel = &(eccinfo->ecc[i]); + ecc_info_per_channel->ce_count_lo_chip = + ecc_table->EccInfo[i].ce_count_lo_chip; + ecc_info_per_channel->ce_count_hi_chip = + ecc_table->EccInfo[i].ce_count_hi_chip; + ecc_info_per_channel->mca_umc_status = + ecc_table->EccInfo[i].mca_umc_status; + ecc_info_per_channel->mca_umc_addr = + ecc_table->EccInfo[i].mca_umc_addr; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -2111,6 +2185,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, .gpo_control = smu_v13_0_gpo_control, + .get_ecc_info = smu_v13_0_0_get_ecc_info, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) -- 2.17.1