On 7/7/2023 3:47 PM, Danijel Slivka wrote:
Why:
If the reg mmMP1_SMN_C2PMSG_90 is being programed to 0x0 before
guest initialization, then modprobe amdgpu will fail at smu hw_init.
(the default mmMP1_SMN_C2PMSG_90 at a clean guest environment is 0x1).
A response to the FW message doesn't mean SMU is idle. Probably, this
is only a check that FW is ready.
Instead of introducing a new ppt function, move this implementation to
check_fw_status(). For VF case, test message may be sent to ascertain
that FW is ready.
Thanks,
Lijo
How to fix:
this patch is to check whether smu is idle by sending a test
message to smu. If smu is idle, it will respond.
Signed-off-by: Danijel Slivka <danijel.slivka@xxxxxxx>
Signed-off-by: Nikola Prica <nikola.prica@xxxxxxx>
Signed-off-by: Jingwen Chen <Jingwen.Chen2@xxxxxxx>
Signed-off-by: pengzhou <PengJu.Zhou@xxxxxxx>
---
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 ++++
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 7 ++++
.../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 1 +
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 40 +++++++++++++++++++
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 +
5 files changed, 58 insertions(+)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..63ea4cd32ece 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1443,6 +1443,14 @@ static int smu_start_smc_engine(struct smu_context *smu)
}
}
+ if (amdgpu_sriov_vf(adev) && smu->ppt_funcs->wait_smu_idle) {
+ ret = smu->ppt_funcs->wait_smu_idle(smu);
+ if (ret) {
+ dev_err(adev->dev, "SMU is not idle\n");
+ return ret;
+ }
+ }
+
/*
* Send msg GetDriverIfVersion to check if the return value is equal
* with DRIVER_IF_VERSION of smc header.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 6e2069dcb6b9..1bf87ad30d93 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -926,6 +926,13 @@ struct pptable_funcs {
*/
int (*check_fw_status)(struct smu_context *smu);
+ /**
+ * @wait_smu_idle: wait for SMU idle status.
+ *
+ * Return: Zero if check passes, negative errno on failure.
+ */
+ int (*wait_smu_idle)(struct smu_context *smu);
+
/**
* @set_mp1_state: put SMU into a correct state for comming
* resume from runpm or gpu reset.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index c94d825a871b..3745e4f96433 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3503,6 +3503,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.init_power = smu_v11_0_init_power,
.fini_power = smu_v11_0_fini_power,
.check_fw_status = smu_v11_0_check_fw_status,
+ .wait_smu_idle = smu_cmn_wait_smu_idle,
.setup_pptable = navi10_setup_pptable,
.get_vbios_bootup_values = smu_v11_0_get_vbios_bootup_values,
.check_fw_version = smu_v11_0_check_fw_version,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3ecb900e6ecd..e3c972984b2b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -313,6 +313,46 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
return res;
}
+/**
+ * smu_cmn_wait_smu_idle -- wait for smu to become idle
+ * @smu: pointer to an SMU context
+ *
+ * Send SMU_MSG_TestMessage to check whether SMU is idle.
+ * If SMU is idle, it will respond.
+ * The returned parameter will be the param you pass + 1.
+ *
+ * Return 0 on success, -errno on error, indicating the execution
+ * status and result of the message being waited for. See
+ * __smu_cmn_reg2errno() for details of the -errno.
+ */
+int smu_cmn_wait_smu_idle(struct smu_context *smu)
+{
+ u32 reg;
+ u32 param = 0xff00011;
+ uint32_t read_arg;
+ int res, index;
+
+ index = smu_cmn_to_asic_specific_index(smu,
+ CMN2ASIC_MAPPING_MSG,
+ SMU_MSG_TestMessage);
+
+ __smu_cmn_send_msg(smu, index, param);
+ reg = __smu_cmn_poll_stat(smu);
+ res = __smu_cmn_reg2errno(smu, reg);
+
+ if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
+ res && (res != -ETIME)) {
+ amdgpu_device_halt(smu->adev);
+ WARN_ON(1);
+ }
+
+ smu_cmn_read_arg(smu, &read_arg);
+ if (read_arg == param + 1)
+ return 0;
+ return res;
+}
+
+
/**
* smu_cmn_send_smc_msg_with_param -- send a message with parameter
* @smu: pointer to an SMU context
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index d7cd358a53bd..65da886d6a8c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -50,6 +50,8 @@ int smu_cmn_send_debug_smc_msg_with_param(struct smu_context *smu,
int smu_cmn_wait_for_response(struct smu_context *smu);
+int smu_cmn_wait_smu_idle(struct smu_context *smu);
+
int smu_cmn_to_asic_specific_index(struct smu_context *smu,
enum smu_cmn2asic_mapping_type type,
uint32_t index);