On 2022-06-08 12:48, sunpeng.li@xxxxxxx wrote: > From: Leo Li <sunpeng.li@xxxxxxx> > > [Why] > > There is a theoretical problem in prior patches for reducing the stack > size of *update_bw_bounding_box() functions. > > By modifying the soc.clock_limits[n] struct directly, this can cause > unintended behavior as the for loop attempts to swap rows in > clock_limits[n]. A temporary struct is still required to make sure we > stay functinoally equivalent. > > [How] > > Add a temporary clock_limits table to the SOC struct, and use it when > swapping rows. > > Signed-off-by: Leo Li <sunpeng.li@xxxxxxx> > --- > .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 33 +++++----- > .../amd/display/dc/dml/dcn301/dcn301_fpu.c | 36 ++++++----- > .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 64 +++++++++++-------- > .../amd/display/dc/dml/display_mode_structs.h | 5 ++ > 4 files changed, 82 insertions(+), 56 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > index c2fec0d85da4..e247b2270b1d 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c > @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params > > ASSERT(clk_table->num_entries); > /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ > - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { > - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i]; > - } Hmm, this for loop didn't make sense. I gave my RB for the previous patch too quickly. > + memcpy(&dcn2_1_soc._clock_tmp, &dcn2_1_soc.clock_limits, > + sizeof(dcn2_1_soc.clock_limits)); > > for (i = 0; i < clk_table->num_entries; i++) { > /* loop backwards*/ > @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params > if (i == 1) > k++; > > - dcn2_1_soc.clock_limits[k].state = k; > - dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > - dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > - dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz; > - dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; > + dcn2_1_soc._clock_tmp[k].state = k; > + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > + dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz; > + dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; > > - dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > - dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > - dcn2_1_soc.clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > - dcn2_1_soc.clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > - dcn2_1_soc.clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > + dcn2_1_soc._clock_tmp[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > + dcn2_1_soc._clock_tmp[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > + dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > + dcn2_1_soc._clock_tmp[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > + dcn2_1_soc._clock_tmp[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > + dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > + dcn2_1_soc._clock_tmp[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > I see why we need a tmp array and agree that we shouldn't allocate it inside DML functions. Reviewed-by: Harry Wentland <harry.wentland@xxxxxxx> Harry > k++; > } > + > + memcpy(&dcn2_1_soc.clock_limits, &dcn2_1_soc._clock_tmp, > + sizeof(dcn2_1_soc.clock_limits)); > + > if (clk_table->num_entries) { > dcn2_1_soc.num_states = clk_table->num_entries + 1; > /* fill in min DF PState */ > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c > index 62cf283d9f41..e4863f0bf0f6 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c > @@ -254,6 +254,9 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param > > dc_assert_fp_enabled(); > > + memcpy(&dcn3_01_soc._clock_tmp, &dcn3_01_soc.clock_limits, > + sizeof(dcn3_01_soc.clock_limits)); > + > /* Default clock levels are used for diags, which may lead to overclocking. */ > if (!IS_DIAG_DC(dc->ctx->dce_environment)) { > dcn3_01_ip.max_num_otg = pool->base.res_cap->num_timing_generator; > @@ -270,29 +273,32 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param > } > } > > - dcn3_01_soc.clock_limits[i].state = i; > - dcn3_01_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > - dcn3_01_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > - dcn3_01_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; > - dcn3_01_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; > - > - dcn3_01_soc.clock_limits[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > - dcn3_01_soc.clock_limits[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > - dcn3_01_soc.clock_limits[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > - dcn3_01_soc.clock_limits[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > - dcn3_01_soc.clock_limits[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > - dcn3_01_soc.clock_limits[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > - dcn3_01_soc.clock_limits[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > + dcn3_01_soc._clock_tmp[i].state = i; > + dcn3_01_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > + dcn3_01_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > + dcn3_01_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; > + dcn3_01_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; > + > + dcn3_01_soc._clock_tmp[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > + dcn3_01_soc._clock_tmp[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > + dcn3_01_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > + dcn3_01_soc._clock_tmp[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > + dcn3_01_soc._clock_tmp[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > + dcn3_01_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > + dcn3_01_soc._clock_tmp[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > } > > if (clk_table->num_entries) { > dcn3_01_soc.num_states = clk_table->num_entries; > /* duplicate last level */ > - dcn3_01_soc.clock_limits[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1]; > - dcn3_01_soc.clock_limits[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states; > + dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1]; > + dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states; > } > } > > + memcpy(&dcn3_01_soc.clock_limits, &dcn3_01_soc._clock_tmp, > + sizeof(dcn3_01_soc.clock_limits)); > + > dcn3_01_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; > dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; > > diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > index 6da702923226..7be3476989ce 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c > @@ -580,6 +580,9 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params > > dc_assert_fp_enabled(); > > + memcpy(&dcn3_1_soc._clock_tmp, &dcn3_1_soc.clock_limits, > + sizeof(dcn3_1_soc.clock_limits)); > + > // Default clock levels are used for diags, which may lead to overclocking. > if (!IS_DIAG_DC(dc->ctx->dce_environment)) { > int max_dispclk_mhz = 0, max_dppclk_mhz = 0; > @@ -607,32 +610,35 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params > } > } > > - dcn3_1_soc.clock_limits[i].state = i; > + dcn3_1_soc._clock_tmp[i].state = i; > > /* Clocks dependent on voltage level. */ > - dcn3_1_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > - dcn3_1_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > - dcn3_1_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; > - dcn3_1_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; > + dcn3_1_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > + dcn3_1_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > + dcn3_1_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; > + dcn3_1_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; > > /* Clocks independent of voltage level. */ > - dcn3_1_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : > + dcn3_1_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : > dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > > - dcn3_1_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : > + dcn3_1_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : > dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > > - dcn3_1_soc.clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > - dcn3_1_soc.clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > - dcn3_1_soc.clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > - dcn3_1_soc.clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > - dcn3_1_soc.clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > + dcn3_1_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > + dcn3_1_soc._clock_tmp[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > + dcn3_1_soc._clock_tmp[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > + dcn3_1_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > + dcn3_1_soc._clock_tmp[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > } > if (clk_table->num_entries) { > dcn3_1_soc.num_states = clk_table->num_entries; > } > } > > + memcpy(&dcn3_1_soc.clock_limits, &dcn3_1_soc._clock_tmp, > + sizeof(dcn3_1_soc.clock_limits)); > + > dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; > dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; > > @@ -705,6 +711,9 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param > > dc_assert_fp_enabled(); > > + memcpy(&dcn3_16_soc._clock_tmp, &dcn3_16_soc.clock_limits, > + sizeof(dcn3_16_soc.clock_limits)); > + > // Default clock levels are used for diags, which may lead to overclocking. > if (!IS_DIAG_DC(dc->ctx->dce_environment)) { > > @@ -736,37 +745,40 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param > closest_clk_lvl = dcn3_16_soc.num_states - 1; > } > > - dcn3_16_soc.clock_limits[i].state = i; > + dcn3_16_soc._clock_tmp[i].state = i; > > /* Clocks dependent on voltage level. */ > - dcn3_16_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > + dcn3_16_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; > if (clk_table->num_entries == 1 && > - dcn3_16_soc.clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { > + dcn3_16_soc._clock_tmp[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { > /*SMU fix not released yet*/ > - dcn3_16_soc.clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; > + dcn3_16_soc._clock_tmp[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; > } > - dcn3_16_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > - dcn3_16_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; > - dcn3_16_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; > + dcn3_16_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; > + dcn3_16_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; > + dcn3_16_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; > > /* Clocks independent of voltage level. */ > - dcn3_16_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : > + dcn3_16_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : > dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; > > - dcn3_16_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : > + dcn3_16_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : > dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; > > - dcn3_16_soc.clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > - dcn3_16_soc.clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > - dcn3_16_soc.clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > - dcn3_16_soc.clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > - dcn3_16_soc.clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > + dcn3_16_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; > + dcn3_16_soc._clock_tmp[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; > + dcn3_16_soc._clock_tmp[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; > + dcn3_16_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; > + dcn3_16_soc._clock_tmp[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; > } > if (clk_table->num_entries) { > dcn3_16_soc.num_states = clk_table->num_entries; > } > } > > + memcpy(&dcn3_16_soc.clock_limits, &dcn3_16_soc._clock_tmp, > + sizeof(dcn3_16_soc.clock_limits)); > + > if (max_dispclk_mhz) { > dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; > dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; > diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h > index 74afa10e70f8..2bdf60846762 100644 > --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h > +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h > @@ -161,6 +161,11 @@ struct _vcs_dpi_voltage_scaling_st { > > struct _vcs_dpi_soc_bounding_box_st { > struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; > + /* > + * This is a temporary stash for updating @clock_limits with the PMFW > + * clock table. Do not use outside of *update_bw_boudning_box functions. > + */ > + struct _vcs_dpi_voltage_scaling_st _clock_tmp[DC__VOLTAGE_STATES]; > unsigned int num_states; > double sr_exit_time_us; > double sr_enter_plus_exit_time_us;