[Public] Hi Jesse, > -----Original Message----- > From: jesse.zhang@xxxxxxx <jesse.zhang@xxxxxxx> > Sent: Thursday, February 20, 2025 4:31 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Zhu, Jiadong > <Jiadong.Zhu@xxxxxxx>; Huang, Tim <Tim.Huang@xxxxxxx>; Zhang, > Jesse(Jie) <Jesse.Zhang@xxxxxxx>; Prosyak, Vitaly > <Vitaly.Prosyak@xxxxxxx>; Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx> > Subject: [PATCH v2 1/2] drm/amd/pm: add support for checking SDMA reset > capability > > From: "Jesse.zhang@xxxxxxx" <Jesse.zhang@xxxxxxx> > > This patch introduces a new function to check if the SMU supports resetting > the SDMA engine. > This capability check ensures that the driver does not attempt to reset the > SDMA engine on hardware that does not support it. > > The following changes are included: > - New function `amdgpu_dpm_reset_sdma_is_supported` to check SDMA > reset > support at the AMDGPU driver level. > - New function `smu_reset_sdma_is_supported` to check SDMA reset support > at the SMU level. > - Implementation of `smu_v13_0_6_reset_sdma_is_supported` for the > specific > SMU version v13.0.6. > - Updated `smu_v13_0_6_reset_sdma` to use the new capability check > before > attempting to reset the SDMA engine. > > Signed-off-by: Vitaly Prosyak <vitaly.prosyak@xxxxxxx> > Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 23 > +++++++++++++++++++ > drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 1 + > drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 17 > ++++++++++++++ > drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 5 > ++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 23 > ++++++++++++++++++- > 5 files changed, 68 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > index faae9bf48aa4..26209d5ff787 100644 > --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c > @@ -722,6 +722,29 @@ int amdgpu_dpm_send_rma_reason(struct > amdgpu_device *adev) > return ret; > } > > +/** > + * amdgpu_dpm_reset_sdma_is_supported - Check if SDMA reset is > +supported > + * @adev: amdgpu_device pointer > + * > + * This function checks if the SMU supports resetting the SDMA engine. > + * It returns -EOPNOTSUPP if the hardware does not support software SMU > +or > + * if the feature is not supported. > + */ > +int amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev) { > + struct smu_context *smu = adev->powerplay.pp_handle; > + int ret; > + > + if (!is_support_sw_smu(adev)) > + return -EOPNOTSUPP; We may need to carefully set the return type and return value for this function, as it is used in your patch 2/2 like this: if ((adev->gfx.mec_fw_version >= 0xb0) &&amdgpu_dpm_reset_sdma_is_supported(adev)) > + > + mutex_lock(&adev->pm.mutex); > + ret = smu_reset_sdma_is_supported(smu); > + mutex_unlock(&adev->pm.mutex); > + > + return ret; > +} > + > int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t > inst_mask) { > struct smu_context *smu = adev->powerplay.pp_handle; diff --git > a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > index 1f5ac7e0230d..353a10119dc5 100644 > --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h > @@ -603,5 +603,6 @@ int amdgpu_dpm_set_pm_policy(struct > amdgpu_device *adev, int policy_type, ssize_t > amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, > enum pp_pm_policy p_type, char *buf); int > amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); > +int amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev); > > #endif > diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > index 0b32c6cf6924..f860590ef893 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c > @@ -3907,6 +3907,23 @@ int smu_send_rma_reason(struct smu_context > *smu) > return ret; > } > > +/** > + * smu_reset_sdma_is_supported - Check if SDMA reset is supported by > +SMU > + * @smu: smu_context pointer > + * > + * This function checks if the SMU supports resetting the SDMA engine. > + * It returns 0 if supported, -EOPNOTSUPP otherwise. > + */ > +int smu_reset_sdma_is_supported(struct smu_context *smu) { > + int ret = 0; > + > + if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported) > + ret = smu->ppt_funcs->reset_sdma_is_supported(smu); > + > + return ret; > +} > + > int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) { > int ret = 0; > diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > index 3630593bce61..090a2b3b81a0 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h > @@ -1376,6 +1376,10 @@ struct pptable_funcs { > * @reset_sdma: message SMU to soft reset sdma instance. > */ > int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask); > + /** > + * @reset_sdma_is_supported: Check if support resets the SDMA > engine. > + */ > + int (*reset_sdma_is_supported)(struct smu_context *smu); > > /** > * @get_ecc_table: message SMU to get ECC INFO table. > @@ -1637,6 +1641,7 @@ int smu_send_hbm_bad_pages_num(struct > smu_context *smu, uint32_t size); int > smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); > int smu_send_rma_reason(struct smu_context *smu); int > smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); > +int smu_reset_sdma_is_supported(struct smu_context *smu); > int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy > p_type, > int level); > ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git > a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > index 9f2de69f53b2..a200ac7b15a8 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > @@ -2902,11 +2902,31 @@ static int > smu_v13_0_6_send_rma_reason(struct smu_context *smu) > return ret; > } > > +/** > + * smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is > +supported > + * @smu: smu_context pointer > + * > + * This function checks if the SMU supports resetting the SDMA engine. > + * It returns -EOPNOTSUPP if the capability is not supported. > + */ > +static int smu_v13_0_6_reset_sdma_is_supported(struct smu_context > *smu) > +{ > + int ret = 0; > + > + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) { > + dev_info(smu->adev->dev, > + "SDMA reset capability is not supported\n"); > + ret = EOPNOTSUPP; Need to set the return value consistence with the return of amdgpu_dpm_reset_sdma_is_supported if not supported. Tim > + } > + > + return ret; > +} > + > static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t > inst_mask) { > int ret = 0; > > - if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) > + if (smu_v13_0_6_reset_sdma_is_supported(smu)) > return -EOPNOTSUPP; > > ret = smu_cmn_send_smc_msg_with_param(smu, > @@ -3590,6 +3610,7 @@ static const struct pptable_funcs > smu_v13_0_6_ppt_funcs = { > .send_hbm_bad_pages_num = > smu_v13_0_6_smu_send_hbm_bad_page_num, > .send_rma_reason = smu_v13_0_6_send_rma_reason, > .reset_sdma = smu_v13_0_6_reset_sdma, > + .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, > }; > > void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) > -- > 2.25.1