RE: [PATCH v2] drm/amd/pm: Ignore initial value in smu response register

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Kenneth Feng <kenneth.feng@xxxxxxx>


-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Danijel Slivka
Sent: Monday, July 8, 2024 9:31 PM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Slivka, Danijel <Danijel.Slivka@xxxxxxx>
Subject: [PATCH v2] drm/amd/pm: Ignore initial value in smu response register

Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.


Why:
If the reg mmMP1_SMN_C2PMSG_90 is being written to during amdgpu driver load or driver unload, subsequent amdgpu driver load will fail at smu_hw_init. The default of mmMP1_SMN_C2PMSG_90 register at a clean environment is 0x1 and if value differs from expected, amdgpu driver load will fail.

How to fix:
Ignore the initial value in smu response register before the first smu message is sent,if smc in SMU_FW_INIT state, just proceed further to send the message. If register holds an unexpected value after smu message was sent set, smc_state to SMU_FW_HANG state and no further smu messages will be sent.

v2:
Set SMU_FW_INIT state at the start of smu hw_init/resume.
Check smc_fw_state before sending smu message if in hang state skip sending message.
Set SMU_FW_HANG only in case unexpected value is detected

Signed-off-by: Danijel Slivka <danijel.slivka@xxxxxxx>
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  2 ++
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  7 ++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c        | 34 ++++++++++++++++---
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index d79bdb1e8cdf..fb8643d25d1b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1755,6 +1755,8 @@ static int smu_start_smc_engine(struct smu_context *smu)
        struct amdgpu_device *adev = smu->adev;
        int ret = 0;

+       smu->smc_fw_state = SMU_FW_INIT;
+
        if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
                if (amdgpu_ip_version(adev, MP1_HWIP, 0) < IP_VERSION(11, 0, 0)) {
                        if (smu->ppt_funcs->load_microcode) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index a34c802f52be..b44a185d07e8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -495,6 +495,12 @@ struct stb_context {
        spinlock_t lock;
 };

+enum smu_fw_status {
+       SMU_FW_INIT = 0,
+       SMU_FW_RUNTIME,
+       SMU_FW_HANG,
+};
+
 #define WORKLOAD_POLICY_MAX 7

 /*
@@ -562,6 +568,7 @@ struct smu_context {
        uint32_t smc_fw_if_version;
        uint32_t smc_fw_version;
        uint32_t smc_fw_caps;
+       uint8_t smc_fw_state;

        bool uploading_custom_pp_table;
        bool dc_controlled_by_gpio;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 5592fd825aa3..d7c983a1f3f5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -315,11 +315,20 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu,
        if (adev->no_hw_access)
                return 0;

-       reg = __smu_cmn_poll_stat(smu);
-       res = __smu_cmn_reg2errno(smu, reg);
-       if (reg == SMU_RESP_NONE ||
-           res == -EREMOTEIO)
+       if (smu->smc_fw_state == SMU_FW_HANG) {
+               dev_err(adev->dev, "SMU is in hanged state, failed to
+ send smu message!\n");
                goto Out;
+       }
+
+       if (smu->smc_fw_state == SMU_FW_INIT) {
+               smu->smc_fw_state = SMU_FW_RUNTIME;
+       } else {
+               reg = __smu_cmn_poll_stat(smu);
+               res = __smu_cmn_reg2errno(smu, reg);
+               if (reg == SMU_RESP_NONE || res == -EREMOTEIO)
+                       goto Out;
+       }
+
        __smu_cmn_send_msg(smu, msg_index, param);
        res = 0;
 Out:
@@ -350,6 +359,9 @@ int smu_cmn_wait_for_response(struct smu_context *smu)
        reg = __smu_cmn_poll_stat(smu);
        res = __smu_cmn_reg2errno(smu, reg);

+       if (res == -EREMOTEIO)
+               smu->smc_fw_state = SMU_FW_HANG;
+
        if (unlikely(smu->adev->pm.smu_debug_mask & SMU_DEBUG_HALT_ON_ERROR) &&
            res && (res != -ETIME)) {
                amdgpu_device_halt(smu->adev); @@ -418,6 +430,15 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
                        goto Out;
        }

+       if (smu->smc_fw_state == SMU_FW_HANG) {
+               dev_err(adev->dev, "SMU is in hanged state, failed to send smu message!\n");
+               goto Out;
+       } else if (smu->smc_fw_state == SMU_FW_INIT) {
+               /* Ignore initial smu response register value */
+               poll = false;
+               smu->smc_fw_state = SMU_FW_RUNTIME;
+       }
+
        if (poll) {
                reg = __smu_cmn_poll_stat(smu);
                res = __smu_cmn_reg2errno(smu, reg); @@ -429,8 +450,11 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
        __smu_cmn_send_msg(smu, (uint16_t) index, param);
        reg = __smu_cmn_poll_stat(smu);
        res = __smu_cmn_reg2errno(smu, reg);
-       if (res != 0)
+       if (res != 0) {
+               if (res == -EREMOTEIO)
+                       smu->smc_fw_state = SMU_FW_HANG;
                __smu_cmn_reg_print_error(smu, reg, index, param, msg);
+       }
        if (read_arg) {
                smu_cmn_read_arg(smu, read_arg);
                dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x,\
--
2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux