Why: If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before guest initialization, then modprobe amdgpu will fail at smu hw_init. (the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1). How to fix: this patch is to check whether smu is idle by sending a test message to smu. If smu is idle, it will respond. Signed-off-by: Danijel Slivka <danijel.slivka@xxxxxxx> Signed-off-by: Nikola Prica <nikola.prica@xxxxxxx> Signed-off-by: Jingwen Chen <Jingwen.Chen2@xxxxxxx> Signed-off-by: pengzhou <PengJu.Zhou@xxxxxxx> --- .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 9 +++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 40 +++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 + 3 files changed, 51 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index aa4a5498a12f..1568b9958150 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -184,6 +184,15 @@ int smu_v11_0_check_fw_status(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; uint32_t mp1_fw_flags; + int ret = 0; + + if (amdgpu_sriov_vf(adev)) { + ret = smu_cmn_wait_smu_idle(smu); + if (ret) { + dev_err(adev->dev, "SMU is not idle\n"); + return ret; + } + } mp1_fw_flags = RREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 3ecb900e6ecd..e3c972984b2b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu) return res; } +/** + * smu_cmn_wait_smu_idle -- wait for smu to become idle + * @smu: pointer to an SMU context + * + * Send SMU_MSG_TestMessage to check whether SMU is idle. + * If SMU is idle, it will respond. + * The returned parameter will be the param you pass + 1. + * + * Return 0 on success, -errno on error, indicating the execution + * status and result of the message being waited for. See + * __smu_cmn_reg2errno() for details of the -errno. + */ +int smu_cmn_wait_smu_idle(struct smu_context *smu) +{ + u32 reg; + u32 param = 0xff00011; + uint32_t read_arg; + int res, index; + + index = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_MSG, + SMU_MSG_TestMessage); + + __smu_cmn_send_msg(smu, index, param); + reg = __smu_cmn_poll_stat(smu); + res = __smu_cmn_reg2errno(smu, reg); + + if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) && + res && (res != -ETIME)) { + amdgpu_device_halt(smu->adev); + WARN_ON(1); + } + + smu_cmn_read_arg(smu, &read_arg); + if (read_arg == param + 1) + return 0; + return res; +} + + /** * smu_cmn_send_smc_msg_with_param -- send a message with parameter * @smu: pointer to an SMU context diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index d7cd358a53bd..65da886d6a8c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu, int smu_cmn_wait_for_response(struct smu_context *smu); +int smu_cmn_wait_smu_idle(struct smu_context *smu); + int smu_cmn_to_asic_specific_index(struct smu_context *smu, enum smu_cmn2asic_mapping_type type, uint32_t index); -- 2.25.1