Re: [PATCH 2/2] drm/amd/display: Enable FAMS for DCN3x

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - General]


Reviewed-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@xxxxxxx>

From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> on behalf of Aurabindo Pillai <aurabindo.pillai@xxxxxxx>
Sent: March 10, 2023 12:56 PM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
Cc: Wentland, Harry <Harry.Wentland@xxxxxxx>; Siqueira, Rodrigo <Rodrigo.Siqueira@xxxxxxx>; Mahfooz, Hamza <Hamza.Mahfooz@xxxxxxx>
Subject: Re: [PATCH 2/2] drm/amd/display: Enable FAMS for DCN3x
 


On 3/10/23 12:48, Aurabindo Pillai wrote:
> [Why&How]
> Firmware Assisted Memclk Switching enables lowering mclk using DMCUB
> when it cannot be normally done due to not having enough time within
> vblank. FAMS extends vblank on monitors that support variable refresh
> rate thereby allowing enough time to do an mclk switch sequence
> during vblank.
>
> When tested with 4k@144Hz monitor on DCN32, power consumption of about
> 40W was saved since multiple clocks like MCLK, SOCCLK, and FCLK
> were brought down.
>
> Signed-off-by: Aurabindo Pillai <aurabindo.pillai@xxxxxxx>
> Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@xxxxxxx>
> ---
>   .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c |  7 +-
>   .../drm/amd/display/dc/dcn30/dcn30_resource.h |  3 +
>   .../drm/amd/display/dc/dcn31/dcn31_hwseq.c    |  4 ++
>   .../drm/amd/display/dc/dcn32/dcn32_hwseq.c    |  2 +
>   .../drm/amd/display/dc/dcn32/dcn32_resource.c |  2 +-
>   .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  | 71 ++++++++++++++++---
>   .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  5 ++
>   7 files changed, 84 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
> index 08b92715e2e6..9963bffb1e07 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
> @@ -301,7 +301,12 @@ void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)
>  
>   void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max)
>   {
> -     optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max);
> +     struct dc *dc = optc->ctx->dc;
> +
> +     if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams)
> +             dc_dmub_srv_drr_update_cmd(dc, optc->inst, vtotal_min, vtotal_max);
> +     else
> +             optc1_set_vtotal_min_max(optc, vtotal_min, vtotal_max);
>   }
>  
>   void optc3_tg_init(struct timing_generator *optc)
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
> index 8e6b8b7368fd..d8805618a9a1 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h
> @@ -102,6 +102,9 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
>  
>   bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
>   void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
> +
> +void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context);
> +

This is duplicate and will remove before applying.
>   int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context,
>                display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel);
>  
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
> index 80a0c5a575a9..40080113ed5e 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
> @@ -295,6 +295,10 @@ void dcn31_init_hw(struct dc *dc)
>        if (dc->res_pool->hubbub->funcs->init_crb)
>                dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
>   #endif
> +     /* Get DMCUB capabilities */
> +     dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub);
> +     dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
> +     dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
>   }
>  
>   void dcn31_dsc_pg_control(
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
> index f87db2271924..3220f9ad8a47 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
> @@ -919,6 +919,8 @@ void dcn32_init_hw(struct dc *dc)
>        if (dc->ctx->dmub_srv) {
>                dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub);
>                dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
> +             dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch;
> +

Will remove the extra newline before applying
>        }
>   }
>  
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> index 100b6df33b33..b1944e49a65d 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> @@ -2013,7 +2013,7 @@ int dcn32_populate_dml_pipes_from_context(
>        // In general cases we want to keep the dram clock change requirement
>        // (prefer configs that support MCLK switch). Only override to false
>        // for SubVP
> -     if (subvp_in_use)
> +     if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || subvp_in_use)
>                context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false;
>        else
>                context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> index 4fa636364793..53f21b0b3630 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> @@ -368,7 +368,9 @@ void dcn30_fpu_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
>        dc_assert_fp_enabled();
>  
>        if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
> -             context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
> +             if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching ||
> +                             context->bw_ctx.dml.soc.dram_clock_change_latency_us == 0)
> +                     context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
>                context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
>                context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
>        }
> @@ -384,9 +386,34 @@ void dcn30_fpu_calculate_wm_and_dlg(
>        int i, pipe_idx;
>        double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
>        bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
> +     unsigned int dummy_latency_index = 0;
>  
>        dc_assert_fp_enabled();
>  
> +     context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
> +
> +     if (!pstate_en) {
> +             /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */
> +             context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching =
> +                     dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context);
> +
> +             if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
> +                     dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
> +                             context, pipes, pipe_cnt, vlevel);
> +
> +                     /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch
> +                      * we reinstate the original dram_clock_change_latency_us on the context
> +                      * and all variables that may have changed up to this point, except the
> +                      * newly found dummy_latency_index
> +                      */
> +                     context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
> +                     dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false, true);
> +                     maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
> +                     dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
> +                     pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
> +             }
> +     }
> +
>        if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
>                dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
>  
> @@ -449,15 +476,29 @@ void dcn30_fpu_calculate_wm_and_dlg(
>                unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
>                unsigned int min_dram_speed_mts_margin = 160;
>  
> -             if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported)
> -                     min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
> +             context->bw_ctx.dml.soc.dram_clock_change_latency_us =
> +                     dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us;
>  
> -             /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
> -             for (i = 3; i > 0; i--)
> -                     if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
> -                             break;
> +             if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
> +                     dm_dram_clock_change_unsupported) {
> +                     int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;
> +
> +                     min_dram_speed_mts =
> +                             dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
> +             }
>  
> -             context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
> +             if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
> +                     /* find largest table entry that is lower than dram speed,
> +                      * but lower than DPM0 still uses DPM0
> +                      */
> +                     for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--)
> +                             if (min_dram_speed_mts + min_dram_speed_mts_margin >
> +                                     dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts)
> +                                     break;
> +             }
> +
> +             context->bw_ctx.dml.soc.dram_clock_change_latency_us =
> +                     dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
>  
>                context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
>                context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
> @@ -520,6 +561,20 @@ void dcn30_fpu_calculate_wm_and_dlg(
>                pipe_idx++;
>        }
>  
> +     /* WA: restrict FW MCLK switch to use first non-strobe mode (Beige Goby BW issue) */
> +     if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching &&
> +                     dc->dml.soc.num_chans <= 4 &&
> +                     context->bw_ctx.dml.vba.DRAMSpeed <= 1700 &&
> +                     context->bw_ctx.dml.vba.DRAMSpeed >= 1500) {
> +
> +             for (i = 0; i < dc->dml.soc.num_states; i++) {
> +                     if (dc->dml.soc.clock_limits[i].dram_speed_mts > 1700) {
> +                             context->bw_ctx.dml.vba.DRAMSpeed = dc->dml.soc.clock_limits[i].dram_speed_mts;
> +                             break;
> +                     }
> +             }
> +     }
> +
>        dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
>  
>        if (!pstate_en)
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> index 077674be452b..ee2683200799 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> @@ -1331,6 +1331,11 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
>                        context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
>                                        != dm_dram_clock_change_unsupported;
>  
> +     /* Pstate change might not be supported by hardware, but it might be
> +      * possible with firmware driven vertical blank stretching.
> +      */
> +     context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching;
> +
>        context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
>        context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
>        context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000;

[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux