From: Austin Zheng <austin.zheng@xxxxxxx> Why: Certain display configs resulted in underflow How: Add an entry containing all max DC clock timings Reviewed-by: Alvin Lee <alvin.lee2@xxxxxxx> Acked-by: Hamza Mahfooz <hamza.mahfooz@xxxxxxx> Signed-off-by: Austin Zheng <austin.zheng@xxxxxxx> --- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 90 ++++++++++++++++-- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 4 - .../amd/display/dc/dml/dcn321/dcn321_fpu.c | 92 +++++++++++++++++-- .../amd/display/dc/dml/dcn321/dcn321_fpu.h | 4 - .../amd/display/dc/dml/display_mode_structs.h | 1 + 5 files changed, 171 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index e2bb2b9971f3..a95034801712 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -485,24 +485,20 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) } } -void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, +static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry) { int i = 0; int index = 0; - float net_bw_of_new_state = 0; dc_assert_fp_enabled(); - get_optimal_ntuple(entry); - if (*num_entries == 0) { table[0] = *entry; (*num_entries)++; } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) { index++; if (index >= *num_entries) break; @@ -2349,6 +2345,63 @@ void dcn32_patch_dpm_table(struct clk_bw_params *bw_params) bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16; } +static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry, + struct _vcs_dpi_voltage_scaling_st *second_entry) +{ + struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry; + *first_entry = *second_entry; + *second_entry = temp_entry; +} + +/* + * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK + */ +static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + unsigned int start_index = 0; + unsigned int end_index = 0; + unsigned int current_bw = 0; + + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + current_bw = table[i].net_bw_in_kbytes_sec; + start_index = i; + end_index = ++i; + + while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw)) + end_index = ++i; + } + + if (start_index != end_index) { + for (int j = start_index; j < end_index; j++) { + for (int k = start_index; k < end_index; k++) { + if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz) + swap_table_entries(&table[k], &table[k+1]); + } + } + } + + start_index = 0; + end_index = 0; + + } +} + +/* + * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing + * and remove entries that do not + */ +static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) || + (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz)) + remove_entry_from_table_at_index(table, num_entries, i); + } + } +} + /* * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings * Input: @@ -2480,6 +2533,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -2488,6 +2543,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); // Insert the UCLK DPMS @@ -2496,6 +2553,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -2506,6 +2565,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } } @@ -2515,6 +2576,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = max_clk_data.fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -2530,6 +2593,21 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk remove_entry_from_table_at_index(table, num_entries, i); } + // Insert entry with all max dc limits without bandwidth matching + if (!disable_dc_mode_overwrite) { + struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry; + + max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz; + max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz; + max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16; + + max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry); + insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry); + + sort_entries_with_same_bw(table, num_entries); + remove_inconsistent_entries(table, num_entries); + } + // At this point, the table only contains supported points of interest // it could be used as is, but some states may be redundant due to // coarse grained nature of some clocks, so we want to round up to diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index a4206b71d650..defbee866be6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -39,10 +39,6 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, uint8_t dcn32_predict_pipe_split(struct dc_state *context, display_e2e_pipe_params_st *pipe_e2e); -void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, - unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry); - void dcn32_set_phantom_stream_timing(struct dc *dc, struct dc_state *context, struct pipe_ctx *ref_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index f0683fd9d3f0..190776063f46 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -207,24 +207,20 @@ static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st * return limiting_bw_kbytes_sec; } -void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, +static void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry) { int i = 0; int index = 0; - float net_bw_of_new_state = 0; dc_assert_fp_enabled(); - get_optimal_ntuple(entry); - if (*num_entries == 0) { table[0] = *entry; (*num_entries)++; } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) { index++; if (index >= *num_entries) break; @@ -252,6 +248,63 @@ static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); } +static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry, + struct _vcs_dpi_voltage_scaling_st *second_entry) +{ + struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry; + *first_entry = *second_entry; + *second_entry = temp_entry; +} + +/* + * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK + */ +static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + unsigned int start_index = 0; + unsigned int end_index = 0; + unsigned int current_bw = 0; + + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + current_bw = table[i].net_bw_in_kbytes_sec; + start_index = i; + end_index = ++i; + + while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw)) + end_index = ++i; + } + + if (start_index != end_index) { + for (int j = start_index; j < end_index; j++) { + for (int k = start_index; k < end_index; k++) { + if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz) + swap_table_entries(&table[k], &table[k+1]); + } + } + } + + start_index = 0; + end_index = 0; + + } +} + +/* + * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing + * and remove entries that do not follow this order + */ +static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + for (int i = 0; i < (*num_entries - 1); i++) { + if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { + if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) || + (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz)) + remove_entry_from_table_at_index(table, num_entries, i); + } + } +} + /* * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings * Input: @@ -383,6 +436,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -391,6 +446,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); // Insert the UCLK DPMS @@ -399,6 +456,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -409,6 +468,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } } @@ -418,6 +479,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk entry.fabricclk_mhz = max_clk_data.fclk_mhz; entry.dram_speed_mts = 0; + get_optimal_ntuple(&entry); + entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry); dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); } @@ -433,6 +496,23 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk remove_entry_from_table_at_index(table, num_entries, i); } + // Insert entry with all max dc limits without bandwitch matching + if (!disable_dc_mode_overwrite) { + struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry; + + max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz; + max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz; + max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16; + + max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry); + dcn321_insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry); + + sort_entries_with_same_bw(table, num_entries); + remove_inconsistent_entries(table, num_entries); + } + + + // At this point, the table only contains supported points of interest // it could be used as is, but some states may be redundant due to // coarse grained nature of some clocks, so we want to round up to diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h index e8fad9b4be69..c6623b3705ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -29,10 +29,6 @@ #include "dml/display_mode_vba.h" -void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, - unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry); - void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index ff0246a9458f..fb17f8868cb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -167,6 +167,7 @@ struct _vcs_dpi_voltage_scaling_st { double phyclk_mhz; double dppclk_mhz; double dtbclk_mhz; + float net_bw_in_kbytes_sec; }; /** -- 2.40.1