[PATCH 159/159] drm/amd/pm: add new data in metrics table

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Kenneth Feng <kenneth.feng@xxxxxxx>

Export new data in the metrics table for gfx and memory
utilization counter, and each hbm temperature as well.

v2:
change the metrics table version to v1.1

v3:
fix the coding style

Signed-off-by: Kenneth Feng <kenneth.feng@xxxxxxx>
Reviewed-by: Kevin Wang <kevin1.wang@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
 .../gpu/drm/amd/include/kgd_pp_interface.h    | 11 ++++
 drivers/gpu/drm/amd/pm/inc/smu_v13_0.h        |  2 +
 .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c    | 55 +++++++++++--------
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c    | 12 ++++
 4 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index c6b5c789abf0..ce01f012963b 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -227,6 +227,8 @@ enum pp_df_cstate {
 #define XGMI_MODE_PSTATE_D3 0
 #define XGMI_MODE_PSTATE_D0 1
 
+#define NUM_HBM_INSTANCES 4
+
 struct seq_file;
 enum amd_pp_clock_type;
 struct amd_pp_simple_clock_info;
@@ -389,6 +391,15 @@ struct gpu_metrics_v1_0 {
 	uint8_t				pcie_link_speed; // in 0.1 GT/s
 };
 
+struct gpu_metrics_v1_1 {
+	struct gpu_metrics_v1_0 v1_0;
+
+	uint32_t			gfx_activity_acc;
+	uint32_t			mem_activity_acc;
+
+	uint16_t			temperature_hbm[NUM_HBM_INSTANCES];
+};
+
 struct gpu_metrics_v2_0 {
 	struct metrics_table_header	common_header;
 
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
index ef9dad9a51ff..b9f87285875f 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h
@@ -267,6 +267,8 @@ int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu);
 
 void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics);
 
+void smu_v13_0_init_gpu_metrics_v1_1(struct gpu_metrics_v1_1 *gpu_metrics);
+
 int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
 			      bool enablement);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index c463af1cafa0..1727de2b31bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1296,10 +1296,11 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
 					 void **table)
 {
 	struct smu_table_context *smu_table = &smu->smu_table;
-	struct gpu_metrics_v1_0 *gpu_metrics =
-		(struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table;
+	struct gpu_metrics_v1_1 *gpu_metrics =
+		(struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table;
 	SmuMetrics_t metrics;
 	int ret = 0;
+	int i;
 
 	ret = smu_cmn_get_metrics_table(smu,
 					&metrics,
@@ -1307,40 +1308,46 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
 	if (ret)
 		return ret;
 
-	smu_v13_0_init_gpu_metrics_v1_0(gpu_metrics);
+	smu_v13_0_init_gpu_metrics_v1_1(gpu_metrics);
 
-	gpu_metrics->temperature_edge = metrics.TemperatureEdge;
-	gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
-	gpu_metrics->temperature_mem = metrics.TemperatureHBM;
-	gpu_metrics->temperature_vrgfx = metrics.TemperatureVrGfx;
-	gpu_metrics->temperature_vrsoc = metrics.TemperatureVrSoc;
-	gpu_metrics->temperature_vrmem = metrics.TemperatureVrMem;
+	gpu_metrics->v1_0.temperature_edge = metrics.TemperatureEdge;
+	gpu_metrics->v1_0.temperature_hotspot = metrics.TemperatureHotspot;
+	gpu_metrics->v1_0.temperature_mem = metrics.TemperatureHBM;
+	gpu_metrics->v1_0.temperature_vrgfx = metrics.TemperatureVrGfx;
+	gpu_metrics->v1_0.temperature_vrsoc = metrics.TemperatureVrSoc;
+	gpu_metrics->v1_0.temperature_vrmem = metrics.TemperatureVrMem;
 
-	gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
-	gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
+	gpu_metrics->v1_0.average_gfx_activity = metrics.AverageGfxActivity;
+	gpu_metrics->v1_0.average_umc_activity = metrics.AverageUclkActivity;
 
-	gpu_metrics->average_socket_power = metrics.AverageSocketPower;
+	gpu_metrics->v1_0.average_socket_power = metrics.AverageSocketPower;
 
-	gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
-	gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency;
-	gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency;
+	gpu_metrics->v1_0.average_gfxclk_frequency = metrics.AverageGfxclkFrequency;
+	gpu_metrics->v1_0.average_socclk_frequency = metrics.AverageSocclkFrequency;
+	gpu_metrics->v1_0.average_uclk_frequency = metrics.AverageUclkFrequency;
 
-	gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
-	gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
-	gpu_metrics->current_uclk = metrics.CurrClock[PPCLK_UCLK];
-	gpu_metrics->current_vclk0 = metrics.CurrClock[PPCLK_VCLK];
-	gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
+	gpu_metrics->v1_0.current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
+	gpu_metrics->v1_0.current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
+	gpu_metrics->v1_0.current_uclk = metrics.CurrClock[PPCLK_UCLK];
+	gpu_metrics->v1_0.current_vclk0 = metrics.CurrClock[PPCLK_VCLK];
+	gpu_metrics->v1_0.current_dclk0 = metrics.CurrClock[PPCLK_DCLK];
 
-	gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+	gpu_metrics->v1_0.throttle_status = metrics.ThrottlerStatus;
 
-	gpu_metrics->pcie_link_width =
+	gpu_metrics->v1_0.pcie_link_width =
 		smu_v13_0_get_current_pcie_link_width(smu);
-	gpu_metrics->pcie_link_speed =
+	gpu_metrics->v1_0.pcie_link_speed =
 		aldebaran_get_current_pcie_link_speed(smu);
 
+	gpu_metrics->gfx_activity_acc = metrics.GfxBusyAcc;
+	gpu_metrics->mem_activity_acc = metrics.DramBusyAcc;
+
+	for (i = 0; i < NUM_HBM_INSTANCES; i++)
+		gpu_metrics->temperature_hbm[i] = metrics.TemperatureAllHBM[i];
+
 	*table = (void *)gpu_metrics;
 
-	return sizeof(struct gpu_metrics_v1_0);
+	return sizeof(*gpu_metrics);
 }
 
 static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index ce160f233323..cde299c110b4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -1820,3 +1820,15 @@ void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics)
 
 	gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
 }
+
+void smu_v13_0_init_gpu_metrics_v1_1(struct gpu_metrics_v1_1 *gpu_metrics)
+{
+	memset(gpu_metrics, 0xFF, sizeof(struct gpu_metrics_v1_1));
+
+	gpu_metrics->v1_0.common_header.structure_size =
+		sizeof(struct gpu_metrics_v1_1);
+	gpu_metrics->v1_0.common_header.format_revision = 1;
+	gpu_metrics->v1_0.common_header.content_revision = 1;
+
+	gpu_metrics->v1_0.system_clock_counter = ktime_get_boottime_ns();
+}
-- 
2.29.2

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux