From: Aurabindo Pillai <aurabindo.pillai@xxxxxxx> [WHAT] Add registers and entry points to enable DCC on DCN4x Reviewed-by: Rodrigo Siqueira <rodrigo.siqueira@xxxxxxx> Acked-by: Alex Hung <alex.hung@xxxxxxx> Signed-off-by: Aurabindo Pillai <aurabindo.pillai@xxxxxxx> --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 + .../drm/amd/display/dc/core/dc_hw_sequencer.c | 11 + drivers/gpu/drm/amd/display/dc/dc.h | 4 + .../drm/amd/display/dc/dml2/dml2_wrapper.c | 6 + .../drm/amd/display/dc/dml2/dml2_wrapper.h | 2 +- .../display/dc/hubbub/dcn30/dcn30_hubbub.c | 3 + .../display/dc/hubbub/dcn31/dcn31_hubbub.c | 3 + .../display/dc/hubbub/dcn401/dcn401_hubbub.c | 280 ++++++++++++++++++ .../display/dc/hubbub/dcn401/dcn401_hubbub.h | 5 + .../amd/display/dc/hubp/dcn20/dcn20_hubp.h | 14 + .../amd/display/dc/hubp/dcn401/dcn401_hubp.c | 21 ++ .../amd/display/dc/hubp/dcn401/dcn401_hubp.h | 14 +- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 4 + .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 25 ++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 2 + .../amd/display/dc/hwss/dcn401/dcn401_init.c | 1 + .../drm/amd/display/dc/hwss/hw_sequencer.h | 9 + .../gpu/drm/amd/display/dc/inc/core_types.h | 3 + .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 4 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 1 + .../dc/resource/dcn401/dcn401_resource.c | 9 + .../dc/resource/dcn401/dcn401_resource.h | 2 + 22 files changed, 433 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index a4ba6f99cd34..85a2ef82afa5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1264,6 +1264,9 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) apply_ctx_interdependent_lock(dc, dc->current_state, old_stream, false); dc->hwss.post_unlock_program_front_end(dc, dangling_context); } + + if (dc->res_pool->funcs->prepare_mcache_programming) + dc->res_pool->funcs->prepare_mcache_programming(dc, dangling_context); if (dc->hwss.program_front_end_for_ctx) { dc->hwss.interdependent_update_lock(dc, dc->current_state, true); dc->hwss.program_front_end_for_ctx(dc, dangling_context); @@ -2037,6 +2040,8 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } /* Program all planes within new context*/ + if (dc->res_pool->funcs->prepare_mcache_programming) + dc->res_pool->funcs->prepare_mcache_programming(dc, context); if (dc->hwss.program_front_end_for_ctx) { dc->hwss.interdependent_update_lock(dc, context, true); dc->hwss.program_front_end_for_ctx(dc, context); @@ -3884,6 +3889,9 @@ static void commit_planes_for_stream(struct dc *dc, odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU; } + if (update_type != UPDATE_TYPE_FAST && dc->res_pool->funcs->prepare_mcache_programming) + dc->res_pool->funcs->prepare_mcache_programming(dc, context); + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program && top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { @@ -3903,6 +3911,10 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg); } + if (dc->hwss.wait_for_dcc_meta_propagation) { + dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program); + } + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 5037474bf95c..87e36d51c56d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -595,6 +595,12 @@ void hwss_build_fast_sequence(struct dc *dc, if (!plane || !stream) return; + if (dc->hwss.wait_for_dcc_meta_propagation) { + block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.dc = dc; + block_sequence[*num_steps].params.wait_for_dcc_meta_propagation_params.top_pipe_to_program = pipe_ctx; + block_sequence[*num_steps].func = HUBP_WAIT_FOR_DCC_META_PROP; + (*num_steps)++; + } if (dc->hwss.subvp_pipe_control_lock_fast) { block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.dc = dc; block_sequence[*num_steps].params.subvp_pipe_control_lock_fast_params.lock = true; @@ -835,6 +841,11 @@ void hwss_execute_sequence(struct dc *dc, case DMUB_SUBVP_SAVE_SURF_ADDR: hwss_subvp_save_surf_addr(params); break; + case HUBP_WAIT_FOR_DCC_META_PROP: + dc->hwss.wait_for_dcc_meta_propagation( + params->wait_for_dcc_meta_propagation_params.dc, + params->wait_for_dcc_meta_propagation_params.top_pipe_to_program); + break; case DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST: dc->hwss.fams2_global_control_lock_fast(params); break; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 64241de70f15..40f183816e31 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -333,6 +333,9 @@ struct dc_dcc_setting { uint32_t dcc_128_128_uncontrained : 1; //available in ASICs before DCN 3.0 uint32_t dcc_256_128_128 : 1; //available starting with DCN 3.0 uint32_t dcc_256_256_unconstrained : 1; //available in ASICs before DCN 3.0 (the best compression case) + uint32_t dcc_256_256 : 1; //available in ASICs starting with DCN 4.0x (the best compression case) + uint32_t dcc_256_128 : 1; //available in ASICs starting with DCN 4.0x + uint32_t dcc_256_64 : 1; //available in ASICs starting with DCN 4.0x (the worst compression case) } dcc_controls; }; @@ -1037,6 +1040,7 @@ struct dc_debug_options { unsigned int static_screen_wait_frames; uint32_t pwm_freq; bool force_chroma_subsampling_1tap; + unsigned int dcc_meta_propagation_delay_us; bool disable_422_left_edge_pixel; bool dml21_force_pstate_method; uint32_t dml21_force_pstate_method_values[MAX_PIPES]; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 60e2bf4ae6de..c58235121474 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -821,6 +821,12 @@ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, *dram_clk_change_support = (unsigned int) dml2->v20.dml_core_ctx.ms.support.DRAMClockChangeSupport[0]; } +void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2) +{ + if (dml2->architecture == dml2_architecture_21) + dml21_prepare_mcache_programming(in_dc, context, dml2); +} + void dml2_copy(struct dml2_context *dst_dml2, struct dml2_context *src_dml2) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 1e891a3297c2..6e3d52eb45c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -303,5 +303,5 @@ bool dml2_validate(const struct dc *in_dc, */ void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, unsigned int *fclk_change_support, unsigned int *dram_clk_change_support); - +void dml2_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2); #endif //_DML2_WRAPPER_H_ diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index 6a5af3da4b45..fe741100c0f8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -339,6 +339,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub, return false; switch (dcc_control) { + case dcc_control__256_256: case dcc_control__256_256_xxx: output->grph.rgb.max_uncompressed_blk_size = 256; output->grph.rgb.max_compressed_blk_size = 256; @@ -346,6 +347,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.dcc_controls.dcc_256_256_unconstrained = 1; output->grph.rgb.dcc_controls.dcc_256_128_128 = 1; break; + case dcc_control__256_128: case dcc_control__128_128_xxx: output->grph.rgb.max_uncompressed_blk_size = 128; output->grph.rgb.max_compressed_blk_size = 128; @@ -353,6 +355,7 @@ bool hubbub3_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.dcc_controls.dcc_128_128_uncontrained = 1; output->grph.rgb.dcc_controls.dcc_256_128_128 = 1; break; + case dcc_control__256_64: case dcc_control__256_64_64: output->grph.rgb.max_uncompressed_blk_size = 256; output->grph.rgb.max_compressed_blk_size = 64; diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index b906db6e7355..7fb5523f9722 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -866,6 +866,7 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub, return false; switch (dcc_control) { + case dcc_control__256_256: case dcc_control__256_256_xxx: output->grph.rgb.max_uncompressed_blk_size = 256; output->grph.rgb.max_compressed_blk_size = 256; @@ -881,12 +882,14 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub, output->grph.rgb.dcc_controls.dcc_256_128_128 = 1; break; case dcc_control__256_64_64: + case dcc_control__256_64: output->grph.rgb.max_uncompressed_blk_size = 256; output->grph.rgb.max_compressed_blk_size = 64; output->grph.rgb.independent_64b_blks = true; output->grph.rgb.dcc_controls.dcc_256_64_64 = 1; break; case dcc_control__256_128_128: + case dcc_control__256_128: output->grph.rgb.max_uncompressed_blk_size = 256; output->grph.rgb.max_compressed_blk_size = 128; output->grph.rgb.independent_64b_blks = false; diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 054607c944a3..5126d603f0b1 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -824,6 +824,285 @@ void hubbub401_det_request_size( } } } +bool hubbub401_get_dcc_compression_cap(struct hubbub *hubbub, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output) +{ + struct dc *dc = hubbub->ctx->dc; + /* DCN4_Programming_Guide_DCHUB.docx, Section 5.11.2.2 */ + enum dcc_control dcc_control; + unsigned int plane0_bpe, plane1_bpe; + enum segment_order segment_order_horz, segment_order_vert; + enum segment_order p1_segment_order_horz, p1_segment_order_vert; + bool req128_horz_wc, req128_vert_wc; + unsigned int plane0_width = 0, plane0_height = 0, plane1_width = 0, plane1_height = 0; + bool p1_req128_horz_wc, p1_req128_vert_wc, is_dual_plane; + + memset(output, 0, sizeof(*output)); + + if (dc->debug.disable_dcc == DCC_DISABLE) + return false; + + switch (input->format) { + default: + is_dual_plane = false; + + plane1_width = 0; + plane1_height = 0; + + if (input->surface_size.width > 6144 + 16) + plane0_width = 6160; + else + plane0_width = input->surface_size.width; + + if (input->surface_size.height > 6144 + 16) + plane0_height = 6160; + else + plane0_height = input->surface_size.height; + + break; + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: + case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: + is_dual_plane = true; + + if (input->surface_size.width > 7680 + 16) + plane0_width = 7696; + else + plane0_width = input->surface_size.width; + + if (input->surface_size.height > 4320 + 16) + plane0_height = 4336; + else + plane0_height = input->surface_size.height; + + if (input->plane1_size.width > 7680 + 16) + plane1_width = 7696 / 2; + else + plane1_width = input->plane1_size.width; + + if (input->plane1_size.height > 4320 + 16) + plane1_height = 4336 / 2; + else + plane1_height = input->plane1_size.height; + + break; + + case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: + is_dual_plane = true; + + if (input->surface_size.width > 5120 + 16) + plane0_width = 5136; + else + plane0_width = input->surface_size.width; + + if (input->surface_size.height > 5120 + 16) + plane0_height = 5136; + else + plane0_height = input->surface_size.height; + + if (input->plane1_size.width > 5120 + 16) + plane1_width = 5136; + else + plane1_width = input->plane1_size.width; + + if (input->plane1_size.height > 5120 + 16) + plane1_height = 5136; + else + plane1_height = input->plane1_size.height; + + break; + } + + if (!hubbub->funcs->dcc_support_pixel_format_plane0_plane1(input->format, + &plane0_bpe, &plane1_bpe)) + return false; + + /* Find plane0 DCC Controls */ + if (!is_dual_plane) { + + if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3, + input->plane0_pitch, plane0_bpe, + &segment_order_horz, &segment_order_vert)) + return false; + + hubbub401_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, input->format, + plane0_height, plane0_width, plane0_bpe, + plane1_height, plane1_width, plane1_bpe, + &req128_horz_wc, &req128_vert_wc, &p1_req128_horz_wc, &p1_req128_vert_wc); + + if (!req128_horz_wc && !req128_vert_wc) { + dcc_control = dcc_control__256_256; + } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { + if (!req128_horz_wc) + dcc_control = dcc_control__256_256; + else if (segment_order_horz == segment_order__contiguous) + dcc_control = dcc_control__256_128; + else + dcc_control = dcc_control__256_64; + } else if (input->scan == SCAN_DIRECTION_VERTICAL) { + if (!req128_vert_wc) + dcc_control = dcc_control__256_256; + else if (segment_order_vert == segment_order__contiguous) + dcc_control = dcc_control__256_128; + else + dcc_control = dcc_control__256_64; + } else { + if ((req128_horz_wc && + segment_order_horz == segment_order__non_contiguous) || + (req128_vert_wc && + segment_order_vert == segment_order__non_contiguous)) + /* access_dir not known, must use most constraining */ + dcc_control = dcc_control__256_64; + else + /* req128 is true for either horz and vert + * but segment_order is contiguous + */ + dcc_control = dcc_control__256_128; + } + + if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE && + dcc_control != dcc_control__256_256) + return false; + + switch (dcc_control) { + case dcc_control__256_256: + output->grph.rgb.dcc_controls.dcc_256_256 = 1; + output->grph.rgb.dcc_controls.dcc_256_128 = 1; + output->grph.rgb.dcc_controls.dcc_256_64 = 1; + break; + case dcc_control__256_128: + output->grph.rgb.dcc_controls.dcc_256_128 = 1; + output->grph.rgb.dcc_controls.dcc_256_64 = 1; + break; + case dcc_control__256_64: + output->grph.rgb.dcc_controls.dcc_256_64 = 1; + break; + default: + /* Shouldn't get here */ + ASSERT(0); + break; + } + } else { + /* For dual plane cases, need to examine both planes together */ + if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3, + input->plane0_pitch, plane0_bpe, + &segment_order_horz, &segment_order_vert)) + return false; + + if (!hubbub->funcs->dcc_support_swizzle_addr3(input->swizzle_mode_addr3, + input->plane1_pitch, plane1_bpe, + &p1_segment_order_horz, &p1_segment_order_vert)) + return false; + + hubbub401_det_request_size(TO_DCN20_HUBBUB(hubbub)->detile_buf_size, input->format, + plane0_height, plane0_width, plane0_bpe, + plane1_height, plane1_width, plane1_bpe, + &req128_horz_wc, &req128_vert_wc, &p1_req128_horz_wc, &p1_req128_vert_wc); + + /* Determine Plane 0 DCC Controls */ + if (!req128_horz_wc && !req128_vert_wc) { + dcc_control = dcc_control__256_256; + } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { + if (!req128_horz_wc) + dcc_control = dcc_control__256_256; + else if (segment_order_horz == segment_order__contiguous) + dcc_control = dcc_control__256_128; + else + dcc_control = dcc_control__256_64; + } else if (input->scan == SCAN_DIRECTION_VERTICAL) { + if (!req128_vert_wc) + dcc_control = dcc_control__256_256; + else if (segment_order_vert == segment_order__contiguous) + dcc_control = dcc_control__256_128; + else + dcc_control = dcc_control__256_64; + } else { + if ((req128_horz_wc && + segment_order_horz == segment_order__non_contiguous) || + (req128_vert_wc && + segment_order_vert == segment_order__non_contiguous)) + /* access_dir not known, must use most constraining */ + dcc_control = dcc_control__256_64; + else + /* req128 is true for either horz and vert + * but segment_order is contiguous + */ + dcc_control = dcc_control__256_128; + } + + switch (dcc_control) { + case dcc_control__256_256: + output->video.luma.dcc_controls.dcc_256_256 = 1; + output->video.luma.dcc_controls.dcc_256_128 = 1; + output->video.luma.dcc_controls.dcc_256_64 = 1; + break; + case dcc_control__256_128: + output->video.luma.dcc_controls.dcc_256_128 = 1; + output->video.luma.dcc_controls.dcc_256_64 = 1; + break; + case dcc_control__256_64: + output->video.luma.dcc_controls.dcc_256_64 = 1; + break; + default: + ASSERT(0); + break; + } + + /* Determine Plane 1 DCC Controls */ + if (!p1_req128_horz_wc && !p1_req128_vert_wc) { + dcc_control = dcc_control__256_256; + } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { + if (!p1_req128_horz_wc) + dcc_control = dcc_control__256_256; + else if (p1_segment_order_horz == segment_order__contiguous) + dcc_control = dcc_control__256_128; + else + dcc_control = dcc_control__256_64; + } else if (input->scan == SCAN_DIRECTION_VERTICAL) { + if (!p1_req128_vert_wc) + dcc_control = dcc_control__256_256; + else if (p1_segment_order_vert == segment_order__contiguous) + dcc_control = dcc_control__256_128; + else + dcc_control = dcc_control__256_64; + } else { + if ((p1_req128_horz_wc && + p1_segment_order_horz == segment_order__non_contiguous) || + (p1_req128_vert_wc && + p1_segment_order_vert == segment_order__non_contiguous)) + /* access_dir not known, must use most constraining */ + dcc_control = dcc_control__256_64; + else + /* req128 is true for either horz and vert + * but segment_order is contiguous + */ + dcc_control = dcc_control__256_128; + } + + switch (dcc_control) { + case dcc_control__256_256: + output->video.chroma.dcc_controls.dcc_256_256 = 1; + output->video.chroma.dcc_controls.dcc_256_128 = 1; + output->video.chroma.dcc_controls.dcc_256_64 = 1; + break; + case dcc_control__256_128: + output->video.chroma.dcc_controls.dcc_256_128 = 1; + output->video.chroma.dcc_controls.dcc_256_64 = 1; + break; + case dcc_control__256_64: + output->video.chroma.dcc_controls.dcc_256_64 = 1; + break; + default: + ASSERT(0); + break; + } + } + + output->capable = true; + return true; +} static void dcn401_program_det_segments(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg) { @@ -891,6 +1170,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .init_vm_ctx = hubbub2_init_vm_ctx, .dcc_support_swizzle_addr3 = hubbub401_dcc_support_swizzle, .dcc_support_pixel_format_plane0_plane1 = hubbub401_dcc_support_pixel_format, + .get_dcc_compression_cap = hubbub401_get_dcc_compression_cap, .wm_read_state = hubbub401_wm_read_state, .get_dchub_ref_freq = hubbub2_get_dchub_ref_freq, .program_watermarks = hubbub401_program_watermarks, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h index d8a57f64a70c..f35f19ba3e18 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h @@ -180,6 +180,11 @@ void hubbub401_det_request_size( bool *p0_req128_vert_wc, bool *p1_req128_horz_wc, bool *p1_req128_vert_wc); +bool hubbub401_get_dcc_compression_cap( + struct hubbub *hubbub, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output); + void hubbub401_construct(struct dcn20_hubbub *hubbub2, struct dc_context *ctx, const struct dcn_hubbub_registers *hubbub_regs, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index ecc0a2f37938..18e194507e36 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -175,6 +175,8 @@ uint32_t HUBP_3DLUT_ADDRESS_LOW;\ uint32_t HUBP_3DLUT_CONTROL;\ uint32_t HUBP_3DLUT_DLG_PARAM;\ + uint32_t DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE;\ + uint32_t DCHUBP_MCACHEID_CONFIG #define DCN2_HUBP_REG_FIELD_VARIABLE_LIST(type) \ DCN_HUBP_REG_FIELD_BASE_LIST(type); \ @@ -269,6 +271,18 @@ type HUBP_3DLUT_ADDRESS_HIGH;\ type HUBP_3DLUT_ADDRESS_LOW;\ type REFCYC_PER_3DLUT_GROUP;\ + type VIEWPORT_MCACHE_SPLIT_COORDINATE;\ + type VIEWPORT_MCACHE_SPLIT_COORDINATE_C;\ + type MCACHEID_REG_READ_1H_P0;\ + type MCACHEID_REG_READ_2H_P0;\ + type MCACHEID_REG_READ_1H_P1;\ + type MCACHEID_REG_READ_2H_P1;\ + type MCACHEID_MALL_PREF_1H_P0;\ + type MCACHEID_MALL_PREF_2H_P0;\ + type MCACHEID_MALL_PREF_1H_P1;\ + type MCACHEID_MALL_PREF_2H_P1 + + struct dcn_hubp2_registers { DCN401_HUBP_REG_COMMON_VARIABLE_LIST; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 3f9ca9b40949..f0c45a74c2e5 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -626,6 +626,26 @@ void hubp401_set_viewport( SEC_VIEWPORT_Y_START_C, viewport_c->y); } +void hubp401_program_mcache_id_and_split_coordinate( + struct hubp *hubp, + struct dml2_hubp_pipe_mcache_regs *mcache_regs) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_SET_8(DCHUBP_MCACHEID_CONFIG, 0, + MCACHEID_REG_READ_1H_P0, mcache_regs->main.p0.mcache_id_first, + MCACHEID_REG_READ_2H_P0, mcache_regs->main.p0.mcache_id_second, + MCACHEID_REG_READ_1H_P1, mcache_regs->main.p1.mcache_id_first, + MCACHEID_REG_READ_2H_P1, mcache_regs->main.p1.mcache_id_second, + MCACHEID_MALL_PREF_1H_P0, mcache_regs->mall.p0.mcache_id_first, + MCACHEID_MALL_PREF_2H_P0, mcache_regs->mall.p0.mcache_id_second, + MCACHEID_MALL_PREF_1H_P1, mcache_regs->mall.p1.mcache_id_first, + MCACHEID_MALL_PREF_2H_P1, mcache_regs->mall.p1.mcache_id_second); + + REG_SET_2(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, 0, + VIEWPORT_MCACHE_SPLIT_COORDINATE, mcache_regs->main.p0.split_location, + VIEWPORT_MCACHE_SPLIT_COORDINATE_C, mcache_regs->main.p1.split_location); +} void hubp401_set_flip_int(struct hubp *hubp) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -963,6 +983,7 @@ static struct hubp_funcs dcn401_hubp_funcs = { .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp401_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, + .hubp_program_mcache_id_and_split_coordinate = hubp401_program_mcache_id_and_split_coordinate, .hubp_update_3dlut_fl_bias_scale = hubp401_update_3dlut_fl_bias_scale, .hubp_program_3dlut_fl_mode = hubp401_program_3dlut_fl_mode, .hubp_program_3dlut_fl_format = hubp401_program_3dlut_fl_format, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index e0cec898a2c0..e52fdb5b0cd0 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -243,6 +243,16 @@ HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_HIGH, HUBP_3DLUT_ADDRESS_HIGH, mask_sh),\ HUBP_SF(CURSOR0_0_HUBP_3DLUT_ADDRESS_LOW, HUBP_3DLUT_ADDRESS_LOW, mask_sh),\ HUBP_SF(CURSOR0_0_HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, VIEWPORT_MCACHE_SPLIT_COORDINATE, mask_sh),\ + HUBP_SF(HUBP0_DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, VIEWPORT_MCACHE_SPLIT_COORDINATE_C, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_1H_P0, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_2H_P0, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_1H_P1, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_REG_READ_2H_P1, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P0, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P0, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P1, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh) void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); @@ -302,7 +312,9 @@ void hubp401_program_surface_config( void hubp401_set_viewport(struct hubp *hubp, const struct rect *viewport, const struct rect *viewport_c); - +void hubp401_program_mcache_id_and_split_coordinate( + struct hubp *hubp, + struct dml2_hubp_pipe_mcache_regs *mcache_regs); void hubp401_set_flip_int(struct hubp *hubp); bool hubp401_in_blank(struct hubp *hubp); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 36797ed7ad8c..2532ad410cb5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1753,6 +1753,10 @@ static void dcn20_update_dchubp_dpp( &pipe_ctx->plane_res.scl_data.viewport_c); viewport_changed = true; } + if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate) + hubp->funcs->hubp_program_mcache_id_and_split_coordinate( + hubp, + &pipe_ctx->mcache_regs); /* Any updates are handled in dc interface, just need to apply existing for plane enable */ if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index b5a02a8fc9d8..11570ef06086 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1336,6 +1336,31 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) return true; } +void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc, + const struct pipe_ctx *top_pipe) +{ + bool is_wait_needed = false; + const struct pipe_ctx *pipe_ctx = top_pipe; + + /* check if any surfaces are updating address while using flip immediate and dcc */ + while (pipe_ctx != NULL) { + if (pipe_ctx->plane_state && + pipe_ctx->plane_state->dcc.enable && + pipe_ctx->plane_state->flip_immediate && + pipe_ctx->plane_state->update_flags.bits.addr_update) { + is_wait_needed = true; + break; + } + + /* check next pipe */ + pipe_ctx = pipe_ctx->bottom_pipe; + } + + if (is_wait_needed && dc->debug.dcc_meta_propagation_delay_us > 0) { + udelay(dc->debug.dcc_meta_propagation_delay_us); + } +} + void dcn401_prepare_bandwidth(struct dc *dc, struct dc_state *context) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index bada43d4b2eb..c1d4287d5a0d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -61,6 +61,8 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable); struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, uint8_t region); +void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc, + const struct pipe_ctx *top_pipe_to_program); void dcn401_prepare_bandwidth(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 8159fd838dc3..6a768702c7bd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -94,6 +94,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .update_dsc_pg = dcn32_update_dsc_pg, .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, .blank_phantom = dcn32_blank_phantom, + .wait_for_dcc_meta_propagation = dcn401_wait_for_dcc_meta_propagation, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index e9b85884edce..d05be65a2256 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -141,6 +141,11 @@ struct subvp_save_surf_addr { uint8_t subvp_index; }; +struct wait_for_dcc_meta_propagation_params { + const struct dc *dc; + const struct pipe_ctx *top_pipe_to_program; +}; + struct fams2_global_control_lock_fast_params { struct dc *dc; bool is_required; @@ -165,6 +170,7 @@ union block_sequence_params { struct set_output_csc_params set_output_csc_params; struct set_ocsc_default_params set_ocsc_default_params; struct subvp_save_surf_addr subvp_save_surf_addr; + struct wait_for_dcc_meta_propagation_params wait_for_dcc_meta_propagation_params; struct fams2_global_control_lock_fast_params fams2_global_control_lock_fast_params; }; @@ -186,6 +192,7 @@ enum block_sequence_func { MPC_SET_OUTPUT_CSC, MPC_SET_OCSC_DEFAULT, DMUB_SUBVP_SAVE_SURF_ADDR, + HUBP_WAIT_FOR_DCC_META_PROP, DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, }; @@ -443,6 +450,8 @@ struct hw_sequencer_funcs { bool (*is_pipe_topology_transition_seamless)(struct dc *dc, const struct dc_state *cur_ctx, const struct dc_state *new_ctx); + void (*wait_for_dcc_meta_propagation)(const struct dc *dc, + const struct pipe_ctx *top_pipe_to_program); void (*fams2_global_control_lock)(struct dc *dc, struct dc_state *context, bool lock); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index f58c27ad8b3e..4c8e6436c7e1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -97,6 +97,9 @@ struct resource_funcs { unsigned int (*calculate_mall_ways_from_bytes)( const struct dc *dc, unsigned int total_size_in_mall_bytes); + void (*prepare_mcache_programming)( + struct dc *dc, + struct dc_state *context); /** * @populate_dml_pipes - Populate pipe data struct * diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index a73cb8f731b3..dd2b2864876c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -40,6 +40,10 @@ enum dcc_control { dcc_control__128_128_xxx, dcc_control__256_64_64, dcc_control__256_128_128, + dcc_control__256_256, + dcc_control__256_128, + dcc_control__256_64, + }; enum segment_order { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index bcd7b22a1627..16580d624278 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -257,6 +257,7 @@ struct hubp_funcs { unsigned int min_dst_y_next_start_optimized); void (*hubp_wait_pipe_read_start)(struct hubp *hubp); + void (*hubp_program_mcache_id_and_split_coordinate)(struct hubp *hubp, struct dml2_hubp_pipe_mcache_regs *mcache_regs); void (*hubp_update_3dlut_fl_bias_scale)(struct hubp *hubp, uint16_t bias, uint16_t scale); void (*hubp_program_3dlut_fl_mode)(struct hubp *hubp, enum hubp_3dlut_fl_mode mode); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 74fb21b88f12..4e27d2cee9fb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1617,6 +1617,14 @@ bool dcn401_validate_bandwidth(struct dc *dc, return out; } +void dcn401_prepare_mcache_programming(struct dc *dc, + struct dc_state *context) +{ + if (dc->debug.using_dml21) + dml2_prepare_mcache_programming(dc, context, + context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2); +} + static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) { const struct dc_stream_state *stream = pipe_ctx->stream; @@ -1699,6 +1707,7 @@ static struct resource_funcs dcn401_res_pool_funcs = { .patch_unknown_plane_state = dcn401_patch_unknown_plane_state, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .add_phantom_pipes = dcn32_add_phantom_pipes, + .prepare_mcache_programming = dcn401_prepare_mcache_programming, .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index c04c8b8f2114..26efeada4f41 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -26,6 +26,8 @@ bool dcn401_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); + /* Following are definitions for run time init of reg offsets */ /* HUBP */ -- 2.34.1