From: Kenneth Feng <kenneth.feng@xxxxxxx> Export new data in the metrics table for gfx and memory utilization counter, and each hbm temperature as well. v2: change the metrics table version to v1.1 v3: fix the coding style Signed-off-by: Kenneth Feng <kenneth.feng@xxxxxxx> Reviewed-by: Kevin Wang <kevin1.wang@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- .../gpu/drm/amd/include/kgd_pp_interface.h | 11 ++++ drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 2 + .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 55 +++++++++++-------- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++++ 4 files changed, 56 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index c6b5c789abf0..ce01f012963b 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -227,6 +227,8 @@ enum pp_df_cstate { #define XGMI_MODE_PSTATE_D3 0 #define XGMI_MODE_PSTATE_D0 1 +#define NUM_HBM_INSTANCES 4 + struct seq_file; enum amd_pp_clock_type; struct amd_pp_simple_clock_info; @@ -389,6 +391,15 @@ struct gpu_metrics_v1_0 { uint8_t pcie_link_speed; // in 0.1 GT/s }; +struct gpu_metrics_v1_1 { + struct gpu_metrics_v1_0 v1_0; + + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + uint16_t temperature_hbm[NUM_HBM_INSTANCES]; +}; + struct gpu_metrics_v2_0 { struct metrics_table_header common_header; diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h index ef9dad9a51ff..b9f87285875f 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0.h @@ -267,6 +267,8 @@ int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu); void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics); +void smu_v13_0_init_gpu_metrics_v1_1(struct gpu_metrics_v1_1 *gpu_metrics); + int smu_v13_0_gfx_ulv_control(struct smu_context *smu, bool enablement); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index c463af1cafa0..1727de2b31bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1296,10 +1296,11 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_0 *gpu_metrics = - (struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_1 *gpu_metrics = + (struct gpu_metrics_v1_1 *)smu_table->gpu_metrics_table; SmuMetrics_t metrics; int ret = 0; + int i; ret = smu_cmn_get_metrics_table(smu, &metrics, @@ -1307,40 +1308,46 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, if (ret) return ret; - smu_v13_0_init_gpu_metrics_v1_0(gpu_metrics); + smu_v13_0_init_gpu_metrics_v1_1(gpu_metrics); - gpu_metrics->temperature_edge = metrics.TemperatureEdge; - gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot; - gpu_metrics->temperature_mem = metrics.TemperatureHBM; - gpu_metrics->temperature_vrgfx = metrics.TemperatureVrGfx; - gpu_metrics->temperature_vrsoc = metrics.TemperatureVrSoc; - gpu_metrics->temperature_vrmem = metrics.TemperatureVrMem; + gpu_metrics->v1_0.temperature_edge = metrics.TemperatureEdge; + gpu_metrics->v1_0.temperature_hotspot = metrics.TemperatureHotspot; + gpu_metrics->v1_0.temperature_mem = metrics.TemperatureHBM; + gpu_metrics->v1_0.temperature_vrgfx = metrics.TemperatureVrGfx; + gpu_metrics->v1_0.temperature_vrsoc = metrics.TemperatureVrSoc; + gpu_metrics->v1_0.temperature_vrmem = metrics.TemperatureVrMem; - gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity; - gpu_metrics->average_umc_activity = metrics.AverageUclkActivity; + gpu_metrics->v1_0.average_gfx_activity = metrics.AverageGfxActivity; + gpu_metrics->v1_0.average_umc_activity = metrics.AverageUclkActivity; - gpu_metrics->average_socket_power = metrics.AverageSocketPower; + gpu_metrics->v1_0.average_socket_power = metrics.AverageSocketPower; - gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequency; - gpu_metrics->average_socclk_frequency = metrics.AverageSocclkFrequency; - gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequency; + gpu_metrics->v1_0.average_gfxclk_frequency = metrics.AverageGfxclkFrequency; + gpu_metrics->v1_0.average_socclk_frequency = metrics.AverageSocclkFrequency; + gpu_metrics->v1_0.average_uclk_frequency = metrics.AverageUclkFrequency; - gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK]; - gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK]; - gpu_metrics->current_uclk = metrics.CurrClock[PPCLK_UCLK]; - gpu_metrics->current_vclk0 = metrics.CurrClock[PPCLK_VCLK]; - gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK]; + gpu_metrics->v1_0.current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK]; + gpu_metrics->v1_0.current_socclk = metrics.CurrClock[PPCLK_SOCCLK]; + gpu_metrics->v1_0.current_uclk = metrics.CurrClock[PPCLK_UCLK]; + gpu_metrics->v1_0.current_vclk0 = metrics.CurrClock[PPCLK_VCLK]; + gpu_metrics->v1_0.current_dclk0 = metrics.CurrClock[PPCLK_DCLK]; - gpu_metrics->throttle_status = metrics.ThrottlerStatus; + gpu_metrics->v1_0.throttle_status = metrics.ThrottlerStatus; - gpu_metrics->pcie_link_width = + gpu_metrics->v1_0.pcie_link_width = smu_v13_0_get_current_pcie_link_width(smu); - gpu_metrics->pcie_link_speed = + gpu_metrics->v1_0.pcie_link_speed = aldebaran_get_current_pcie_link_speed(smu); + gpu_metrics->gfx_activity_acc = metrics.GfxBusyAcc; + gpu_metrics->mem_activity_acc = metrics.DramBusyAcc; + + for (i = 0; i < NUM_HBM_INSTANCES; i++) + gpu_metrics->temperature_hbm[i] = metrics.TemperatureAllHBM[i]; + *table = (void *)gpu_metrics; - return sizeof(struct gpu_metrics_v1_0); + return sizeof(*gpu_metrics); } static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index ce160f233323..cde299c110b4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1820,3 +1820,15 @@ void smu_v13_0_init_gpu_metrics_v1_0(struct gpu_metrics_v1_0 *gpu_metrics) gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); } + +void smu_v13_0_init_gpu_metrics_v1_1(struct gpu_metrics_v1_1 *gpu_metrics) +{ + memset(gpu_metrics, 0xFF, sizeof(struct gpu_metrics_v1_1)); + + gpu_metrics->v1_0.common_header.structure_size = + sizeof(struct gpu_metrics_v1_1); + gpu_metrics->v1_0.common_header.format_revision = 1; + gpu_metrics->v1_0.common_header.content_revision = 1; + + gpu_metrics->v1_0.system_clock_counter = ktime_get_boottime_ns(); +} -- 2.29.2 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx