support ECC TABLE message, this table include unc ras error count and error address Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx> --- drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 7 ++++ .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 38 +++++++++++++++++++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 + drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 24 ++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 3 ++ 5 files changed, 74 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 3557f4e7fc30..ea65de0160c3 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -324,6 +324,7 @@ enum smu_table_id SMU_TABLE_OVERDRIVE, SMU_TABLE_I2C_COMMANDS, SMU_TABLE_PACE, + SMU_TABLE_ECCINFO, SMU_TABLE_COUNT, }; @@ -340,6 +341,7 @@ struct smu_table_context void *max_sustainable_clocks; struct smu_bios_boot_up_values boot_values; void *driver_pptable; + void *ecc_table; struct smu_table tables[SMU_TABLE_COUNT]; /* * The driver table is just a staging buffer for @@ -1261,6 +1263,11 @@ struct pptable_funcs { * of SMUBUS table. */ int (*send_hbm_bad_pages_num)(struct smu_context *smu, uint32_t size); + + /** + * @get_ecc_table: message SMU to get ECC INFO table. + */ + ssize_t (*get_ecc_info)(struct smu_context *smu, void *table); }; typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index f835d86cc2f5..5e4ba0e14a91 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -190,6 +190,7 @@ static const struct cmn2asic_mapping aldebaran_table_map[SMU_TABLE_COUNT] = { TAB_MAP(SMU_METRICS), TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(I2C_COMMANDS), + TAB_MAP(ECCINFO), }; static const uint8_t aldebaran_throttler_map[] = { @@ -223,6 +224,9 @@ static int aldebaran_tables_init(struct smu_context *smu) SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO, sizeof(EccInfoTable_t), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; @@ -235,6 +239,10 @@ static int aldebaran_tables_init(struct smu_context *smu) return -ENOMEM; } + smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); + if (!smu_table->ecc_table) + return -ENOMEM; + return 0; } @@ -1765,6 +1773,35 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static ssize_t aldebaran_get_ecc_info(struct smu_context *smu, + void *table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + EccInfoTable_t ecc_table; + struct ecc_info_per_ch *ecc_info_per_channel = NULL; + int i, ret = 0; + struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table; + + ret = smu_cmn_get_ecc_info_table(smu, + &ecc_table); + if (ret) + return ret; + + for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) { + ecc_info_per_channel = &(eccinfo->ecc[i]); + ecc_info_per_channel->ce_count_lo_chip = + ecc_table.EccInfo[i].ce_count_lo_chip; + ecc_info_per_channel->ce_count_hi_chip = + ecc_table.EccInfo[i].ce_count_hi_chip; + ecc_info_per_channel->mca_umc_status = + ecc_table.EccInfo[i].mca_umc_status; + ecc_info_per_channel->mca_umc_addr = + ecc_table.EccInfo[i].mca_umc_addr; + } + + return ret; +} + static int aldebaran_mode1_reset(struct smu_context *smu) { u32 smu_version, fatal_err, param; @@ -1967,6 +2004,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .i2c_init = aldebaran_i2c_control_init, .i2c_fini = aldebaran_i2c_control_fini, .send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num, + .get_ecc_info = aldebaran_get_ecc_info, }; void aldebaran_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 4d96099a9bb1..55421ea622fb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -428,8 +428,10 @@ int smu_v13_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = NULL; + kfree(smu_table->ecc_table); kfree(smu_table->metrics_table); kfree(smu_table->watermarks_table); + smu_table->ecc_table = NULL; smu_table->metrics_table = NULL; smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 843d2cbfc71d..e229c9b09d80 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -983,6 +983,30 @@ int smu_cmn_get_metrics_table(struct smu_context *smu, return ret; } +int smu_cmn_get_ecc_info_table(struct smu_context *smu, + void *ecc_table) +{ + struct smu_table_context *smu_table= &smu->smu_table; + uint32_t table_size = + smu_table->tables[SMU_TABLE_ECCINFO].size; + int ret = 0; + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ECCINFO, + 0, + smu_table->ecc_table, + false); + if (ret) { + dev_info(smu->adev->dev, "Failed to export SMU ecc table!\n"); + return ret; + } + + if (ecc_table) + memcpy(ecc_table, smu_table->ecc_table, table_size); + + return 0; +} + void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) { struct metrics_table_header *header = (struct metrics_table_header *)table; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index beea03810bca..0adc5451373b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -105,6 +105,9 @@ int smu_cmn_get_metrics_table(struct smu_context *smu, void *metrics_table, bool bypass_cache); +int smu_cmn_get_ecc_info_table(struct smu_context *smu, + void *table); + void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, -- 2.17.1