[PATCH] drm/amd/pm: bug fix for baco reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On vega20, rocm-smi gets the wrong gfx voltage after baco reset.
This can be reproduced as below.
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 737
:~$ rocm-smi -d0 --gpureset
GPU[0] : GPU reset was successful
:~$ rocm-smi --showvoltage
GPU[0] : Voltage (mV): 1550

Root cause: telemetry is disabled in the asic_init after baco exit.
This fix targets to re-enable telemetry then all the power and voltage
info can be fetched correctly, mp1 firmware also depends on this setting
for dpm arbitration.

Signed-off-by: Kenneth Feng <kenneth.feng@xxxxxxx>
---
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.c    | 17 +++++++++++++++++
 .../drm/amd/pm/powerplay/hwmgr/vega20_baco.h    |  2 +-
 .../drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c   |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
index 2a28c9df15a0..bb58097a925c 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.c
@@ -28,9 +28,16 @@
 #include "vega20_ppsmc.h"
 #include "vega20_baco.h"
 #include "vega20_smumgr.h"
+#include "smuio/smuio_9_0_offset.h"
+#include "smuio/smuio_9_0_sh_mask.h"
 
 #include "amdgpu_ras.h"
 
+#define mmSMUSVI0_TFN 0x2
+#define SMUSVI0_TFN___PLANE0_MASK 0x1
+#define SMUSVI0_TFN___PLANE1_MASK 0x2
+#define mmSMUSVI0_TFN_BASE_IDX 0
+
 static const struct soc15_baco_cmd_entry clean_baco_tbl[] =
 {
 	{CMD_WRITE, SOC15_REG_ENTRY(NBIF, 0, mmBIOS_SCRATCH_6), 0, 0, 0, 0},
@@ -120,3 +127,13 @@ int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr)
 
 	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_BacoWorkAroundFlushVDCI, NULL);
 }
+
+void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+	uint32_t data = RREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN);
+
+	data &= (~SMUSVI0_TFN___PLANE0_MASK);
+	data |= SMUSVI0_TFN___PLANE1_MASK;
+	WREG32_SOC15(SMUIO, 0, mmSMUSVI0_TFN, data);
+}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
index f06471e712dc..9ca39569ba0e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_baco.h
@@ -29,5 +29,5 @@ extern int vega20_baco_get_capability(struct pp_hwmgr *hwmgr, bool *cap);
 extern int vega20_baco_get_state(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 extern int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 extern int vega20_baco_apply_vdci_flush_workaround(struct pp_hwmgr *hwmgr);
-
+extern void vega20_baco_override_telemetry_parameters(struct pp_hwmgr *hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 213c9c6b4462..12830a8dd923 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -503,6 +503,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
 		ret = vega20_baco_apply_vdci_flush_workaround(hwmgr);
 		if (ret)
 			pr_err("Failed to apply vega20 baco workaround!\n");
+		vega20_baco_override_telemetry_parameters(hwmgr);
 	}
 
 	return ret;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux