[AMD Official Use Only - General] Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Regards, Hawking -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Candice Li Sent: Friday, January 13, 2023 10:40 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Li, Candice <Candice.Li@xxxxxxx> Subject: [PATCH] drm/amd/pm: Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10 Support RAS fatal error mode1 reset on smu v13_0_0 and v13_0_10. Signed-off-by: Candice Li <candice.li@xxxxxxx> Reviewed-by: Evan Quan <evan.quan@xxxxxxx> --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 42 +++++++++++++++++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 6 +++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 3 ++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 969e5f96554015..d0cdc578344d8d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1904,15 +1904,51 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu, NULL); } +static void smu_v13_0_0_set_mode1_reset_param(struct smu_context *smu, + uint32_t supported_version, + uint32_t *param) +{ + uint32_t smu_version; + struct amdgpu_device *adev = smu->adev; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + smu_cmn_get_smc_version(smu, NULL, &smu_version); + + if ((smu_version >= supported_version) && + ras && atomic_read(&ras->in_recovery)) + /* Set RAS fatal error reset flag */ + *param = 1 << 16; + else + *param = 0; +} + static int smu_v13_0_0_mode1_reset(struct smu_context *smu) { int ret; + uint32_t param; struct amdgpu_device *adev = smu->adev; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) - ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset); - else + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + /* SMU 13_0_0 PMFW supports RAS fatal error reset from 78.77 */ + smu_v13_0_0_set_mode1_reset_param(smu, 0x004e4d00, ¶m); + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_Mode1Reset, param, NULL); + break; + + case IP_VERSION(13, 0, 10): + /* SMU 13_0_10 PMFW supports RAS fatal error reset from 80.28 */ + smu_v13_0_0_set_mode1_reset_param(smu, 0x00501c00, ¶m); + + ret = smu_cmn_send_debug_smc_msg_with_param(smu, + DEBUGSMC_MSG_Mode1Reset, param); + break; + + default: ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + break; + } if (!ret) msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 768b6e7dbd7719..d5abafc5a68201 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -404,6 +404,12 @@ int smu_cmn_send_debug_smc_msg(struct smu_context *smu, return __smu_cmn_send_debug_msg(smu, msg, 0); } +int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu, + uint32_t msg, uint32_t param) +{ + return __smu_cmn_send_debug_msg(smu, msg, param); } + int smu_cmn_to_asic_specific_index(struct smu_context *smu, enum smu_cmn2asic_mapping_type type, uint32_t index) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index f82cf76dd3a474..d7cd358a53bdcd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -45,6 +45,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, int smu_cmn_send_debug_smc_msg(struct smu_context *smu, uint32_t msg); +int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu, + uint32_t msg, uint32_t param); + int smu_cmn_wait_for_response(struct smu_context *smu); int smu_cmn_to_asic_specific_index(struct smu_context *smu, -- 2.17.1