RE: [PATCH v2 1/2] drm/amd/pm: add support for checking SDMA reset capability

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[Public]

Hi Jesse,

> -----Original Message-----
> From: jesse.zhang@xxxxxxx <jesse.zhang@xxxxxxx>
> Sent: Thursday, February 20, 2025 4:31 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Zhu, Jiadong
> <Jiadong.Zhu@xxxxxxx>; Huang, Tim <Tim.Huang@xxxxxxx>; Zhang,
> Jesse(Jie) <Jesse.Zhang@xxxxxxx>; Prosyak, Vitaly
> <Vitaly.Prosyak@xxxxxxx>; Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx>
> Subject: [PATCH v2 1/2] drm/amd/pm: add support for checking SDMA reset
> capability
>
> From: "Jesse.zhang@xxxxxxx" <Jesse.zhang@xxxxxxx>
>
> This patch introduces a new function to check if the SMU supports resetting
> the SDMA engine.
> This capability check ensures that the driver does not attempt to reset the
> SDMA engine on hardware that does not support it.
>
> The following changes are included:
> - New function `amdgpu_dpm_reset_sdma_is_supported` to check SDMA
> reset
>   support at the AMDGPU driver level.
> - New function `smu_reset_sdma_is_supported` to check SDMA reset support
>   at the SMU level.
> - Implementation of `smu_v13_0_6_reset_sdma_is_supported` for the
> specific
>   SMU version v13.0.6.
> - Updated `smu_v13_0_6_reset_sdma` to use the new capability check
> before
>   attempting to reset the SDMA engine.
>
> Signed-off-by: Vitaly Prosyak <vitaly.prosyak@xxxxxxx>
> Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 23
> +++++++++++++++++++
>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |  1 +
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 17
> ++++++++++++++
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  5
> ++++  .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  | 23
> ++++++++++++++++++-
>  5 files changed, 68 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index faae9bf48aa4..26209d5ff787 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -722,6 +722,29 @@ int amdgpu_dpm_send_rma_reason(struct
> amdgpu_device *adev)
>       return ret;
>  }
>
> +/**
> + * amdgpu_dpm_reset_sdma_is_supported - Check if SDMA reset is
> +supported
> + * @adev: amdgpu_device pointer
> + *
> + * This function checks if the SMU supports resetting the SDMA engine.
> + * It returns -EOPNOTSUPP if the hardware does not support software SMU
> +or
> + * if the feature is not supported.
> + */
> +int amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev) {
> +     struct smu_context *smu = adev->powerplay.pp_handle;
> +     int ret;
> +
> +     if (!is_support_sw_smu(adev))
> +             return -EOPNOTSUPP;


We may need to carefully set the return type and return value for this function, as it is used in your patch 2/2 like this:
            if ((adev->gfx.mec_fw_version >= 0xb0) &&amdgpu_dpm_reset_sdma_is_supported(adev))
> +
> +     mutex_lock(&adev->pm.mutex);
> +     ret = smu_reset_sdma_is_supported(smu);
> +     mutex_unlock(&adev->pm.mutex);
> +
> +     return ret;
> +}
> +
>  int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t
> inst_mask)  {
>       struct smu_context *smu = adev->powerplay.pp_handle; diff --git
> a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index 1f5ac7e0230d..353a10119dc5 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -603,5 +603,6 @@ int amdgpu_dpm_set_pm_policy(struct
> amdgpu_device *adev, int policy_type,  ssize_t
> amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
>                                     enum pp_pm_policy p_type, char *buf);  int
> amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
> +int amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev);
>
>  #endif
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index 0b32c6cf6924..f860590ef893 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -3907,6 +3907,23 @@ int smu_send_rma_reason(struct smu_context
> *smu)
>       return ret;
>  }
>
> +/**
> + * smu_reset_sdma_is_supported - Check if SDMA reset is supported by
> +SMU
> + * @smu: smu_context pointer
> + *
> + * This function checks if the SMU supports resetting the SDMA engine.
> + * It returns 0 if supported, -EOPNOTSUPP otherwise.
> + */
> +int smu_reset_sdma_is_supported(struct smu_context *smu) {
> +     int ret = 0;
> +
> +     if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported)
> +             ret = smu->ppt_funcs->reset_sdma_is_supported(smu);
> +
> +     return ret;
> +}
> +
>  int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)  {
>       int ret = 0;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index 3630593bce61..090a2b3b81a0 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -1376,6 +1376,10 @@ struct pptable_funcs {
>        * @reset_sdma: message SMU to soft reset sdma instance.
>        */
>       int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask);
> +     /**
> +      * @reset_sdma_is_supported: Check if support resets the SDMA
> engine.
> +      */
> +     int (*reset_sdma_is_supported)(struct smu_context *smu);
>
>       /**
>        * @get_ecc_table:  message SMU to get ECC INFO table.
> @@ -1637,6 +1641,7 @@ int smu_send_hbm_bad_pages_num(struct
> smu_context *smu, uint32_t size);  int
> smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
> int smu_send_rma_reason(struct smu_context *smu);  int
> smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
> +int smu_reset_sdma_is_supported(struct smu_context *smu);
>  int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy
> p_type,
>                     int level);
>  ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git
> a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index 9f2de69f53b2..a200ac7b15a8 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -2902,11 +2902,31 @@ static int
> smu_v13_0_6_send_rma_reason(struct smu_context *smu)
>       return ret;
>  }
>
> +/**
> + * smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is
> +supported
> + * @smu: smu_context pointer
> + *
> + * This function checks if the SMU supports resetting the SDMA engine.
> + * It returns -EOPNOTSUPP if the capability is not supported.
> + */
> +static int smu_v13_0_6_reset_sdma_is_supported(struct smu_context
> *smu)
> +{
> +     int ret = 0;
> +
> +     if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) {
> +             dev_info(smu->adev->dev,
> +                     "SDMA reset capability is not supported\n");
> +             ret = EOPNOTSUPP;

Need to set the return value consistence with the return of amdgpu_dpm_reset_sdma_is_supported if not supported.

Tim
> +     }
> +
> +     return ret;
> +}
> +
>  static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t
> inst_mask)  {
>       int ret = 0;
>
> -     if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET)))
> +     if (smu_v13_0_6_reset_sdma_is_supported(smu))
>               return -EOPNOTSUPP;
>
>       ret = smu_cmn_send_smc_msg_with_param(smu,
> @@ -3590,6 +3610,7 @@ static const struct pptable_funcs
> smu_v13_0_6_ppt_funcs = {
>       .send_hbm_bad_pages_num =
> smu_v13_0_6_smu_send_hbm_bad_page_num,
>       .send_rma_reason = smu_v13_0_6_send_rma_reason,
>       .reset_sdma = smu_v13_0_6_reset_sdma,
> +     .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported,
>  };
>
>  void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
> --
> 2.25.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux