-----邮件原件-----
发件人: Lazar, Lijo <Lijo.Lazar@xxxxxxx>
发送时间: Thursday, November 18, 2021 7:33 PM
收件人: Yang, Stanley <Stanley.Yang@xxxxxxx>; amd-
gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking <Hawking.Zhang@xxxxxxx>;
Clements, John <John.Clements@xxxxxxx>; Quan, Evan
<Evan.Quan@xxxxxxx>; Wang, Yang(Kevin) <KevinYang.Wang@xxxxxxx>
主题: Re: [PATCH Review 3/4] drm/amdgpu: add message smu to get
ecc_table v2
On 11/18/2021 3:03 PM, Stanley.Yang wrote:
support ECC TABLE message, this table include umc ras error count and
error address
v2:
add smu version check to query whether support ecctable
call smu_cmn_update_table to get ecctable directly
Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx>
---
drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 8 +++
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 ++++
.../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 70
+++++++++++++++++++
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 +
4 files changed, 94 insertions(+)
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 3557f4e7fc30..7a06021a58f0 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -324,6 +324,7 @@ enum smu_table_id
SMU_TABLE_OVERDRIVE,
SMU_TABLE_I2C_COMMANDS,
SMU_TABLE_PACE,
+ SMU_TABLE_ECCINFO,
SMU_TABLE_COUNT,
};
@@ -340,6 +341,7 @@ struct smu_table_context
void *max_sustainable_clocks;
struct smu_bios_boot_up_values boot_values;
void *driver_pptable;
+ void *ecc_table;
struct smu_table tables[SMU_TABLE_COUNT];
/*
* The driver table is just a staging buffer for @@ -1261,6
+1263,11 @@ struct pptable_funcs {
*
of SMUBUS table.
*/
int (*send_hbm_bad_pages_num)(struct smu_context *smu,
uint32_t
size);
+
+ /**
+ * @get_ecc_table: message SMU to get ECC INFO table.
+ */
+ ssize_t (*get_ecc_info)(struct smu_context *smu, void *table);
};
typedef enum {
@@ -1397,6 +1404,7 @@ int smu_set_light_sbr(struct smu_context *smu,
bool enable);
int smu_wait_for_event(struct amdgpu_device *adev, enum
smu_event_type event,
uint64_t event_arg);
+int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
#endif
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 01168b8955bf..fd3b6b460b12 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -3072,6 +3072,20 @@ int smu_set_light_sbr(struct smu_context *smu,
bool enable)
return ret;
}
+int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc) {
+ int ret = -EOPNOTSUPP;
+
+ mutex_lock(&smu->mutex);
+ if (smu->ppt_funcs &&
+ smu->ppt_funcs->get_ecc_info)
+ ret = smu->ppt_funcs->get_ecc_info(smu, umc_ecc);
+ mutex_unlock(&smu->mutex);
+
+ return ret;
+
+}
+
static int smu_get_prv_buffer_details(void *handle, void **addr, size_t
*size)
{
struct smu_context *smu = handle;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index f835d86cc2f5..4c21609ccea5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -78,6 +78,12 @@
#define smnPCIE_ESM_CTRL 0x111003D0
+/*
+ * SMU support ECCTABLE since version 68.42.0,
+ * use this to check ECCTALE feature whether support */ #define
+SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00
+
static const struct smu_temperature_range smu13_thermal_policy[] =
{
{-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000,
99000}, @@ -190,6 +196,7 @@ static const struct cmn2asic_mapping
aldebaran_table_map[SMU_TABLE_COUNT] = {
TAB_MAP(SMU_METRICS),
TAB_MAP(DRIVER_SMU_CONFIG),
TAB_MAP(I2C_COMMANDS),
+ TAB_MAP(ECCINFO),
};
static const uint8_t aldebaran_throttler_map[] = { @@ -223,6 +230,9
@@ static int aldebaran_tables_init(struct smu_context *smu)
SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS,
sizeof(SwI2cRequest_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+ SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO,
sizeof(EccInfoTable_t),
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+
smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t),
GFP_KERNEL);
if (!smu_table->metrics_table)
return -ENOMEM;
@@ -235,6 +245,10 @@ static int aldebaran_tables_init(struct smu_context
*smu)
return -ENOMEM;
}
+ smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size,
GFP_KERNEL);
+ if (!smu_table->ecc_table)
+ return -ENOMEM;
+
return 0;
}
@@ -1765,6 +1779,61 @@ static ssize_t aldebaran_get_gpu_metrics(struct
smu_context *smu,
return sizeof(struct gpu_metrics_v1_3);
}
+static int aldebaran_check_ecc_table_support(struct smu_context *smu)
+{
+ uint32_t if_version = 0xff, smu_version = 0xff;
+ int ret = 0;
+
+ ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
+ if (ret)
+ ret = -EOPNOTSUPP; // return not support if failed get