If gpu reset is triggered by ras fatal error, tell it to smu in mode-1 reset message. v2: move mode-1 reset function to aldebaran_ppt.c since it's aldebaran specific currently. Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 3 +- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 36 ++++++++++++++++++- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 21 ----------- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index e5d3b0d1a032..bbc608c990b0 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -29,6 +29,8 @@ #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x07 +#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms + /* MP Apertures */ #define MP0_Public 0x03800000 #define MP0_SRAM 0x03900000 @@ -216,7 +218,6 @@ int smu_v13_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) int smu_v13_0_baco_enter(struct smu_context *smu); int smu_v13_0_baco_exit(struct smu_context *smu); -int smu_v13_0_mode1_reset(struct smu_context *smu); int smu_v13_0_mode2_reset(struct smu_context *smu); int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 59a7d276541d..e50d4491aa96 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1765,6 +1765,40 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static int aldebaran_mode1_reset(struct smu_context *smu) +{ + u32 smu_version, fatal_err, param; + int ret = 0; + struct amdgpu_device *adev = smu->adev; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + fatal_err = 0; + param = SMU_RESET_MODE_1; + + /* + * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07 + */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (smu_version < 0x00440700) + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + else { + /* fatal error triggered by ras, PMFW supports the flag + from 68.44.0 */ + if ((smu_version >= 0x00442c00) && ras && + atomic_read(&ras->in_recovery)) + fatal_err = 1; + + param |= (fatal_err << 16); + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GfxDeviceDriverReset, param, NULL); + } + + if (!ret) + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + static int aldebaran_mode2_reset(struct smu_context *smu) { u32 smu_version; @@ -1925,7 +1959,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .get_gpu_metrics = aldebaran_get_gpu_metrics, .mode1_reset_is_support = aldebaran_is_mode1_reset_supported, .mode2_reset_is_support = aldebaran_is_mode2_reset_supported, - .mode1_reset = smu_v13_0_mode1_reset, + .mode1_reset = aldebaran_mode1_reset, .set_mp1_state = aldebaran_set_mp1_state, .mode2_reset = aldebaran_mode2_reset, .wait_for_event = smu_v13_0_wait_for_event, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 35145db6eedf..4d96099a9bb1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -60,8 +60,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); #define SMU13_VOLTAGE_SCALE 4 -#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms - #define LINK_WIDTH_MAX 6 #define LINK_SPEED_MAX 3 @@ -1424,25 +1422,6 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu) return ret; } -int smu_v13_0_mode1_reset(struct smu_context *smu) -{ - u32 smu_version; - int ret = 0; - /* - * PM FW support SMU_MSG_GfxDeviceDriverReset from 68.07 - */ - smu_cmn_get_smc_version(smu, NULL, &smu_version); - if (smu_version < 0x00440700) - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); - else - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_1, NULL); - - if (!ret) - msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); - - return ret; -} - static int smu_v13_0_wait_for_reset_complete(struct smu_context *smu, uint64_t event_arg) { -- 2.17.1