Adjust dpm optimization settings on smu7 based on the activity level read from SMU. in manual dpm mode, auto wattman will be disabled. Change-Id: I68396f04be76793ece3b07f0ac1b646981ca1e29 Reviewed-by: Alex Deucher <alexander.deucher at amd.com> Signed-off-by: Rex Zhu <Rex.Zhu at amd.com> --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 218 ++++++++++++++++++++++- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h | 26 +++ 2 files changed, 239 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index cb75c4f..0e7f986 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -82,7 +82,6 @@ #define TCLK (PCIE_BUS_CLK / 10) static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size); - static const struct profile_mode_setting smu7_profiling[5] = {{1, 0, 100, 30, 1, 0, 100, 10}, {1, 10, 0, 30, 0, 0, 0, 0}, @@ -2527,9 +2526,10 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK; else data->pcie_gen_cap = (uint32_t)sys_info.value; + if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) data->pcie_spc_cap = 20; - sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW; result = cgs_query_system_info(hwmgr->device, &sys_info); if (result) @@ -2537,6 +2537,11 @@ static int smu7_hwmgr_backend_init(struct pp_hwmgr *hwmgr) else data->pcie_lane_cap = (uint32_t)sys_info.value; + sys_info.info_id = CGS_SYSTEM_INFO_VRAM_WIDTH; + result = cgs_query_system_info(hwmgr->device, &sys_info); + if (!result) + data->memory_bit_width = (uint32_t)sys_info.value; + hwmgr->platform_descriptor.vbiosInterruptId = 0x20000400; /* IRQ_SOURCE1_SW_INT */ /* The true clock step depends on the frequency, typically 4.5 or 9 MHz. Here we use 5. */ hwmgr->platform_descriptor.clockStep.engineClock = 500; @@ -2772,10 +2777,17 @@ static int smu7_force_dpm_level(struct pp_hwmgr *hwmgr, ret = smu7_force_dpm_lowest(hwmgr); break; case AMD_DPM_FORCED_LEVEL_AUTO: - if (hwmgr->default_power_profile_mode != hwmgr->power_profile_mode) { - long mode = hwmgr->default_power_profile_mode; + if (hwmgr->autowattman_enabled) { + if (hwmgr->hwmgr_func->start_auto_wattman != NULL) { + if (!cancel_delayed_work_sync(&hwmgr->wattman_update_work)) + hwmgr->hwmgr_func->start_auto_wattman(hwmgr, true); + } + } else { + if (hwmgr->default_power_profile_mode != hwmgr->power_profile_mode) { + long mode = hwmgr->default_power_profile_mode; - smu7_set_power_profile_mode(hwmgr, &mode, 0); + smu7_set_power_profile_mode(hwmgr, &mode, 0); + } } if (hwmgr->chip_id == CHIP_FIJI) smu7_enable_power_containment(hwmgr); @@ -2794,6 +2806,11 @@ static int smu7_force_dpm_level(struct pp_hwmgr *hwmgr, smu7_force_clock_level(hwmgr, PP_PCIE, 1<<pcie_mask); break; case AMD_DPM_FORCED_LEVEL_MANUAL: + if (hwmgr->autowattman_enabled) { + if (hwmgr->hwmgr_func->start_auto_wattman != NULL) + hwmgr->hwmgr_func->start_auto_wattman(hwmgr, false); + } + break; case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; @@ -5025,6 +5042,195 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint return 0; } +static void smu7_start_auto_wattman(struct pp_hwmgr *hwmgr, bool en) +{ + if (en) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); + schedule_delayed_work(&hwmgr->wattman_update_work, WATTMAM_SAMPLE_PERIOD); + } else { + cancel_delayed_work_sync(&hwmgr->wattman_update_work); + } +} + +static void smu7_auto_wattman_get_data(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + data->wattman_data.average_sclk_busy = cgs_read_ind_register( + hwmgr->device, + CGS_IND_REG__SMC, + ixSMU_PM_STATUS_91); + data->wattman_data.average_sclk_busy >>= 8; + + data->wattman_data.average_mclk_busy = cgs_read_ind_register( + hwmgr->device, + CGS_IND_REG__SMC, + ixSMU_PM_STATUS_92); + + data->wattman_data.average_mclk_busy >>= 8; + + data->wattman_data.effective_sclk = cgs_read_ind_register( + hwmgr->device, + CGS_IND_REG__SMC, + ixSMU_PM_STATUS_77); + + data->wattman_data.effective_mclk = cgs_read_ind_register( + hwmgr->device, + CGS_IND_REG__SMC, + ixSMU_PM_STATUS_111); + + pr_debug("effective sclk: %x average sclk busy: %x\n", + data->wattman_data.effective_sclk, + data->wattman_data.average_sclk_busy); + pr_debug("effective mclk: %x average mclk busy: %x\n", + data->wattman_data.effective_mclk, + data->wattman_data.average_mclk_busy); + + return; +} + +static int smu7_auto_wattman_update_clk_setting(struct pp_hwmgr *hwmgr, + struct smu7_auto_wattman_adjust_settings *setting, + uint32_t clk_busy, uint32_t threshold) +{ + uint32_t adjust_factor = 0; + uint32_t divide_factor = 0; + + if (setting == NULL) + return -EINVAL; + + if (clk_busy < threshold) { + divide_factor = threshold / AutoWattmanAlgorithmMaxAdjustFactor; + adjust_factor = AutoWattmanAlgorithmMaxAdjustFactor - (clk_busy / divide_factor); + setting->uphyst_adjust = adjust_factor; + setting->uphyst_polarity = 1; + setting->downhyst_adjust = 6 * adjust_factor; + setting->downhyst_polarity = 0; + setting->activity_adjust = 2 * adjust_factor; + setting->activity_polarity = 1; + } else { + divide_factor = (100 - threshold) / AutoWattmanAlgorithmMaxAdjustFactor; + adjust_factor = (clk_busy - threshold + divide_factor - 1) / divide_factor; + setting->uphyst_adjust = adjust_factor; + setting->uphyst_polarity = 0; + setting->downhyst_adjust = 6 * adjust_factor; + setting->downhyst_polarity = 1; + setting->activity_adjust = 2 * adjust_factor; + setting->activity_polarity = 0; + } + + return 0; +} + +static void smu7_auto_wattman_algorithm(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t threshold = 0x32; + + smu7_auto_wattman_update_clk_setting(hwmgr, &data->sclk_settings, + data->wattman_data.average_sclk_busy, threshold); + + if (data->memory_bit_width == 64) + threshold = 0xA; + else if (hwmgr->chip_id == CHIP_POLARIS12) + threshold = 0xF; + else + threshold = 0x14; + + smu7_auto_wattman_update_clk_setting(hwmgr, &data->mclk_settings, + data->wattman_data.average_mclk_busy, threshold); +} + +static int smu7_auto_wattman_get_adjust_setting(uint32_t low_limit, uint32_t high_limit, + uint32_t polarity, uint32_t adjust_value, + uint32_t original_setting) +{ + if (!polarity) { + if (original_setting < low_limit + adjust_value) + original_setting = low_limit; + else + original_setting -= adjust_value; + } else { + if ((high_limit < adjust_value) || (high_limit - adjust_value < original_setting)) + original_setting = high_limit; + else + original_setting += adjust_value; + } + + return original_setting; +} + +static int smu7_update_auto_wattman(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct profile_mode_setting tmp; + uint32_t high_limit, low_limit; + + high_limit = AutoWattmanSCLKHighLimits; + low_limit = AutoWattmanSCLKLowLimits; + + smu7_auto_wattman_get_data(hwmgr); + smu7_auto_wattman_algorithm(hwmgr); + + tmp.sclk_up_hyst = smu7_auto_wattman_get_adjust_setting(low_limit & 0xff, + high_limit & 0xff, + data->sclk_settings.uphyst_polarity, + data->sclk_settings.uphyst_adjust, + data->current_profile_setting.sclk_up_hyst); + + tmp.sclk_down_hyst = smu7_auto_wattman_get_adjust_setting((low_limit >> 8) & 0xff, + (high_limit >> 8) & 0xff, + data->sclk_settings.downhyst_polarity, + data->sclk_settings.downhyst_adjust, + data->current_profile_setting.sclk_down_hyst); + + tmp.sclk_activity = smu7_auto_wattman_get_adjust_setting((low_limit >> 16) & 0xffff, + (high_limit >> 16) & 0xffff, + data->sclk_settings.activity_polarity, + data->sclk_settings.activity_adjust, + data->current_profile_setting.sclk_activity); + + if ((tmp.sclk_up_hyst == data->current_profile_setting.sclk_up_hyst) && + (tmp.sclk_down_hyst == data->current_profile_setting.sclk_down_hyst) && + (tmp.sclk_activity == data->current_profile_setting.sclk_activity)) + tmp.bupdate_sclk = false; + else + tmp.bupdate_sclk = true; + + high_limit = AutoWattmanMCLKHighLimits; + low_limit = AutoWattmanMCLKLowLimits; + + tmp.mclk_up_hyst = smu7_auto_wattman_get_adjust_setting(low_limit && 0xff, + high_limit && 0xff, + data->mclk_settings.uphyst_polarity, + data->mclk_settings.uphyst_adjust, + data->current_profile_setting.mclk_up_hyst); + + tmp.mclk_down_hyst = smu7_auto_wattman_get_adjust_setting((low_limit >> 8) && 0xff, + (high_limit >> 8) && 0xff, + data->mclk_settings.downhyst_polarity, + data->mclk_settings.downhyst_adjust, + data->current_profile_setting.mclk_down_hyst); + + tmp.mclk_activity = smu7_auto_wattman_get_adjust_setting((low_limit >> 16) && 0xffff, + (high_limit >> 16) && 0xffff, + data->mclk_settings.activity_polarity, + data->mclk_settings.activity_adjust, + data->current_profile_setting.mclk_activity); + + if ((tmp.mclk_up_hyst == data->current_profile_setting.mclk_up_hyst) && + (tmp.mclk_down_hyst == data->current_profile_setting.mclk_down_hyst) && + (tmp.mclk_activity == data->current_profile_setting.mclk_activity)) + tmp.bupdate_mclk = false; + else + tmp.bupdate_mclk = true; + + if (!smum_update_dpm_settings(hwmgr, &tmp) && ((tmp.bupdate_mclk) || tmp.bupdate_mclk)) + memcpy(&data->current_profile_setting, &tmp, sizeof(struct profile_mode_setting)); + + return 0; +} + static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .backend_init = &smu7_hwmgr_backend_init, .backend_fini = &smu7_hwmgr_backend_fini, @@ -5081,6 +5287,8 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint .set_power_limit = smu7_set_power_limit, .get_power_profile_mode = smu7_get_power_profile_mode, .set_power_profile_mode = smu7_set_power_profile_mode, + .update_auto_wattman = smu7_update_auto_wattman, + .start_auto_wattman = smu7_start_auto_wattman, }; uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h index 3bcfc61..9f61507 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h @@ -186,6 +186,21 @@ struct smu7_odn_dpm_table { uint32_t odn_mclk_min_limit; }; +#define AutoWattmanSCLKHighLimits 0x002D3C0A +#define AutoWattmanSCLKLowLimits 0x00190000 +#define AutoWattmanMCLKHighLimits 0x002D3C0A +#define AutoWattmanMCLKLowLimits 0x000A1000 +#define AutoWattmanAlgorithmMaxAdjustFactor 5 + +struct smu7_auto_wattman_adjust_settings { + uint32_t uphyst_adjust; + uint32_t uphyst_polarity; + uint32_t downhyst_adjust; + uint32_t downhyst_polarity; + uint32_t activity_adjust; + uint32_t activity_polarity; +}; + struct profile_mode_setting { uint8_t bupdate_sclk; uint8_t sclk_up_hyst; @@ -197,6 +212,13 @@ struct profile_mode_setting { uint16_t mclk_activity; }; +struct smu7_auto_wattman_data { + uint32_t effective_sclk; + uint32_t effective_mclk; + uint32_t average_sclk_busy; + uint32_t average_mclk_busy; +}; + struct smu7_hwmgr { struct smu7_dpm_table dpm_table; struct smu7_dpm_table golden_dpm_table; @@ -327,6 +349,10 @@ struct smu7_hwmgr { uint32_t vr_config; struct profile_mode_setting custom_profile_setting; struct profile_mode_setting current_profile_setting; + struct smu7_auto_wattman_data wattman_data; + uint32_t memory_bit_width; + struct smu7_auto_wattman_adjust_settings sclk_settings; + struct smu7_auto_wattman_adjust_settings mclk_settings; }; /* To convert to Q8.8 format for firmware */ -- 1.9.1