On Tue, Oct 24, 2023 at 11:57 PM Kenneth Feng <kenneth.feng@xxxxxxx> wrote: > > fix the high voltage and temperature issue after the driver is unloaded on smu 13.0.0, > smu 13.0.7 and smu 13.0.10 > v2 - fix the code format and make sure it is used on the unload case only. > > Signed-off-by: Kenneth Feng <kenneth.feng@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 36 +++++++++++++++---- > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 33 +++++++++++++++-- > drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + > drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 ++ > .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 13 +++++++ > .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 ++++- > .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 8 ++++- > 7 files changed, 90 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 31f8c3ead161..c5c892a8b3f9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3986,13 +3986,23 @@ int amdgpu_device_init(struct amdgpu_device *adev, > } > } > } else { > - tmp = amdgpu_reset_method; > - /* It should do a default reset when loading or reloading the driver, > - * regardless of the module parameter reset_method. > - */ > - amdgpu_reset_method = AMD_RESET_METHOD_NONE; > - r = amdgpu_asic_reset(adev); > - amdgpu_reset_method = tmp; > + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { > + case IP_VERSION(13, 0, 0): > + case IP_VERSION(13, 0, 7): > + case IP_VERSION(13, 0, 10): > + r = psp_gpu_reset(adev); > + break; > + default: > + tmp = amdgpu_reset_method; > + /* It should do a default reset when loading or reloading the driver, > + * regardless of the module parameter reset_method. > + */ > + amdgpu_reset_method = AMD_RESET_METHOD_NONE; > + r = amdgpu_asic_reset(adev); > + amdgpu_reset_method = tmp; > + break; > + } > + > if (r) { > dev_err(adev->dev, "asic reset on init failed\n"); > goto failed; > @@ -5945,6 +5955,18 @@ int amdgpu_device_baco_exit(struct drm_device *dev) > return -ENOTSUPP; > > ret = amdgpu_dpm_baco_exit(adev); > + > + if (!ret) > + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { > + case IP_VERSION(13, 0, 0): > + case IP_VERSION(13, 0, 7): > + case IP_VERSION(13, 0, 10): > + adev->gfx.is_poweron = false; > + break; > + default: > + break; > + } Was it not possible to put this in the smu13 baco exit code? > + > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index 7c3356d6da5e..2e82172ba250 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -733,7 +733,7 @@ static int smu_early_init(void *handle) > smu->adev = adev; > smu->pm_enabled = !!amdgpu_dpm; > smu->is_apu = false; > - smu->smu_baco.state = SMU_BACO_STATE_EXIT; > + smu->smu_baco.state = SMU_BACO_STATE_NONE; > smu->smu_baco.platform_support = false; > smu->user_dpm_profile.fan_mode = -1; > > @@ -1740,10 +1740,31 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) > return 0; > } > > +static int smu_reset_mp1_state(struct smu_context *smu) > +{ > + struct amdgpu_device *adev = smu->adev; > + int ret = 0; > + > + if ((!adev->in_runpm) && (!adev->in_suspend) && > + (!amdgpu_in_reset(adev))) > + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { > + case IP_VERSION(13, 0, 0): > + case IP_VERSION(13, 0, 7): > + case IP_VERSION(13, 0, 10): > + ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); > + break; Is there any reason not to enable this on all dGPUs? Alex > + default: > + break; > + } > + > + return ret; > +} > + > static int smu_hw_fini(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > struct smu_context *smu = adev->powerplay.pp_handle; > + int ret; > > if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) > return 0; > @@ -1761,7 +1782,15 @@ static int smu_hw_fini(void *handle) > > adev->pm.dpm_enabled = false; > > - return smu_smc_hw_cleanup(smu); > + ret = smu_smc_hw_cleanup(smu); > + if (ret) > + return ret; > + > + ret = smu_reset_mp1_state(smu); > + if (ret) > + return ret; > + > + return 0; > } > > static void smu_late_fini(void *handle) > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > index 1454eed76604..9f2dbc90b606 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > @@ -419,6 +419,7 @@ enum smu_reset_mode { > enum smu_baco_state { > SMU_BACO_STATE_ENTER = 0, > SMU_BACO_STATE_EXIT, > + SMU_BACO_STATE_NONE, > }; > > struct smu_baco_context { > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h > index cc02f979e9e9..43c7ba68eb50 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h > @@ -299,5 +299,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, > uint8_t pcie_gen_cap, > uint8_t pcie_width_cap); > > +int smu_v13_0_disable_pmfw_state(struct smu_context* smu); > + > #endif > #endif > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c > index bcb7ab9d2221..0724441e53ef 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c > @@ -2473,3 +2473,16 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, > > return 0; > } > + > +int smu_v13_0_disable_pmfw_state(struct smu_context* smu) > +{ > + int ret; > + struct amdgpu_device *adev = smu->adev; > + > + WREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff), 0); > + > + ret = RREG32_PCIE(MP1_Public | > + (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); > + > + return ret == 0 ? 0 : -EINVAL; > +} > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > index 47d008cbc186..e2a09fe29e2f 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c > @@ -2758,7 +2758,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, > > switch (mp1_state) { > case PP_MP1_STATE_UNLOAD: > - ret = smu_cmn_set_mp1_state(smu, mp1_state); > + ret = smu_cmn_send_smc_msg_with_param(smu, > + SMU_MSG_PrepareMp1ForUnload, > + 0x55, NULL); > + > + if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) > + ret = smu_v13_0_disable_pmfw_state(smu); > + > break; > default: > /* Ignore others */ > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > index b8a7a1d853df..2a0d1da18a9b 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c > @@ -2429,7 +2429,13 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, > > switch (mp1_state) { > case PP_MP1_STATE_UNLOAD: > - ret = smu_cmn_set_mp1_state(smu, mp1_state); > + ret = smu_cmn_send_smc_msg_with_param(smu, > + SMU_MSG_PrepareMp1ForUnload, > + 0x55, NULL); > + > + if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) > + ret = smu_v13_0_disable_pmfw_state(smu); > + > break; > default: > /* Ignore others */ > -- > 2.34.1 >