[AMD Official Use Only - General] Reviewed-by: Stanley.Yang <Stanley.Yang@xxxxxxx> Regards, Stanley > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of > Candice Li > Sent: Wednesday, March 1, 2023 2:10 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Li, Candice <Candice.Li@xxxxxxx> > Subject: [PATCH] drm/amd/pm: Enable ecc_info table support for smu > v13_0_10 > > Support EccInfoTable which includes umc ras error count and error address. > > Signed-off-by: Candice Li <candice.li@xxxxxxx> > Reviewed-by: Evan Quan <evan.quan@xxxxxxx> > --- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 75 > +++++++++++++++++++ > 1 file changed, 75 insertions(+) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > index 923a9fb3c8873c..27448ffe60a439 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > @@ -46,6 +46,7 @@ > #include "asic_reg/mp/mp_13_0_0_sh_mask.h" > #include "smu_cmn.h" > #include "amdgpu_ras.h" > +#include "umc_v8_10.h" > > /* > * DO NOT use these for err/warn/info/debug messages. > @@ -90,6 +91,12 @@ > > #define DEBUGSMC_MSG_Mode1Reset 2 > > +/* > + * SMU_v13_0_10 supports ECCTABLE since version 80.34.0, > + * use this to check ECCTABLE feature whether support */ #define > +SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200 > + > static struct cmn2asic_msg_mapping > smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = { > MSG_MAP(TestMessage, > PPSMC_MSG_TestMessage, 1), > MSG_MAP(GetSmuVersion, > PPSMC_MSG_GetSmuVersion, 1), > @@ -229,6 +236,7 @@ static struct cmn2asic_mapping > smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { > TAB_MAP(ACTIVITY_MONITOR_COEFF), > [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, > TAB_MAP(I2C_COMMANDS), > + TAB_MAP(ECCINFO), > }; > > static struct cmn2asic_mapping > smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -462,6 > +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu) > AMDGPU_GEM_DOMAIN_VRAM); > SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE, > MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE, > PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); > + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, > sizeof(EccInfoTable_t), > + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); > > smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), > GFP_KERNEL); > if (!smu_table->metrics_table) > @@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct > smu_context *smu) > if (!smu_table->watermarks_table) > goto err2_out; > > + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, > GFP_KERNEL); > + if (!smu_table->ecc_table) > + goto err3_out; > + > return 0; > > +err3_out: > + kfree(smu_table->watermarks_table); > err2_out: > kfree(smu_table->gpu_metrics_table); > err1_out: > @@ -2036,6 +2052,64 @@ static int > smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, > return ret; > } > > +static int smu_v13_0_0_check_ecc_table_support(struct smu_context > *smu) > +{ > + struct amdgpu_device *adev = smu->adev; > + uint32_t if_version = 0xff, smu_version = 0xff; > + int ret = 0; > + > + ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version); > + if (ret) > + return -EOPNOTSUPP; > + > + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) && > + (smu_version >= > SUPPORT_ECCTABLE_SMU_13_0_10_VERSION)) > + return ret; > + else > + return -EOPNOTSUPP; > +} > + > +static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu, > + void > *table) > +{ > + struct smu_table_context *smu_table = &smu->smu_table; > + struct amdgpu_device *adev = smu->adev; > + EccInfoTable_t *ecc_table = NULL; > + struct ecc_info_per_ch *ecc_info_per_channel = NULL; > + int i, ret = 0; > + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; > + > + ret = smu_v13_0_0_check_ecc_table_support(smu); > + if (ret) > + return ret; > + > + ret = smu_cmn_update_table(smu, > + SMU_TABLE_ECCINFO, > + 0, > + smu_table->ecc_table, > + false); > + if (ret) { > + dev_info(adev->dev, "Failed to export SMU ecc table!\n"); > + return ret; > + } > + > + ecc_table = (EccInfoTable_t *)smu_table->ecc_table; > + > + for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) { > + ecc_info_per_channel = &(eccinfo->ecc[i]); > + ecc_info_per_channel->ce_count_lo_chip = > + ecc_table->EccInfo[i].ce_count_lo_chip; > + ecc_info_per_channel->ce_count_hi_chip = > + ecc_table->EccInfo[i].ce_count_hi_chip; > + ecc_info_per_channel->mca_umc_status = > + ecc_table->EccInfo[i].mca_umc_status; > + ecc_info_per_channel->mca_umc_addr = > + ecc_table->EccInfo[i].mca_umc_addr; > + } > + > + return ret; > +} > + > static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { > .get_allowed_feature_mask = > smu_v13_0_0_get_allowed_feature_mask, > .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, > @@ -2111,6 +2185,7 @@ static const struct pptable_funcs > smu_v13_0_0_ppt_funcs = { > .send_hbm_bad_pages_num = > smu_v13_0_0_smu_send_bad_mem_page_num, > .send_hbm_bad_channel_flag = > smu_v13_0_0_send_bad_mem_channel_flag, > .gpo_control = smu_v13_0_gpo_control, > + .get_ecc_info = smu_v13_0_0_get_ecc_info, > }; > > void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) > -- > 2.17.1