Re: [PATCH] drm/amdgpu/display: FP fixes for DCN3.x (v4)

Nathan Chancellor <natechancellor@xxxxxxxxx> · Tue, 3 Nov 2020 18:11:43 -0700

On Tue, Nov 03, 2020 at 06:10:39PM -0500, Alex Deucher wrote:
> Add proper FP_START/END handling and adjust Makefiles per
> previous asics.
> 
> v2: fix up harder.
> v3: fix clkmgr Makefile for dcn30
> v4: fix old gcc handling is only required for x86
> 
> Reviewed-by: Harry Wentland <harry.wentland@xxxxxxx> (v1)
> Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@xxxxxxx> (v1)
> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>

This resolves all the build issues I have seen with both GCC and LLVM on
aarch64:

Build-tested-by: Nathan Chancellor <natechancellor@xxxxxxxxx>

> ---
>  .../gpu/drm/amd/display/dc/clk_mgr/Makefile   | 26 +++++++
>  .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  |  4 +-
>  drivers/gpu/drm/amd/display/dc/dcn30/Makefile |  2 +
>  .../drm/amd/display/dc/dcn30/dcn30_resource.c | 71 +++++++++++++++++--
>  drivers/gpu/drm/amd/display/dc/dml/Makefile   |  8 ++-
>  5 files changed, 104 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> index facc8b970300..d59b380e7b7f 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> @@ -119,6 +119,19 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
>  ###############################################################################
>  CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
>  
> +# prevent build errors regarding soft-float vs hard-float FP ABI tags
> +# this code is currently unused on ppc64, as it applies to VanGogh APUs only
> +ifdef CONFIG_PPC64
> +CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
> +endif
> +
> +# prevent build errors:
> +# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
> +# this file is unused on arm64, just like on ppc64
> +ifdef CONFIG_ARM64
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only
> +endif
> +
>  AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
>  
>  AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
> @@ -127,6 +140,19 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
>  ###############################################################################
>  CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o
>  
> +# prevent build errors regarding soft-float vs hard-float FP ABI tags
> +# this code is currently unused on ppc64, as it applies to VanGogh APUs only
> +ifdef CONFIG_PPC64
> +CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
> +endif
> +
> +# prevent build errors:
> +# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types
> +# this file is unused on arm64, just like on ppc64
> +ifdef CONFIG_ARM64
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only
> +endif
> +
>  AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301))
>  
>  AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301)
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> index 7bad73b2d146..82cb688ba5e0 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> @@ -104,7 +104,7 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk
>  	}
>  }
>  
> -static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
> +static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
>  {
>  	/* defaults */
>  	double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
> @@ -211,7 +211,9 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
>  	clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base);
>  
>  	/* WM range table */
> +	DC_FP_START();
>  	dcn3_build_wm_range_table(clk_mgr);
> +	DC_FP_END();
>  }
>  
>  static int dcn30_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
> index bd2a068f9863..248c2711aace 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
> @@ -52,6 +52,7 @@ IS_OLD_GCC = 1
>  endif
>  endif
>  
> +ifdef CONFIG_X86
>  ifdef IS_OLD_GCC
>  # Stack alignment mismatch, proceed with caution.
>  # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
> @@ -62,6 +63,7 @@ else
>  CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2
>  CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2
>  endif
> +endif
>  
>  AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30))
>  
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> index d65496917e93..01ac8b2921c6 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> @@ -1469,7 +1469,19 @@ int dcn30_populate_dml_pipes_from_context(
>  	return pipe_cnt;
>  }
>  
> -void dcn30_populate_dml_writeback_from_context(
> +/*
> + * This must be noinline to ensure anything that deals with FP registers
> + * is contained within this call; previously our compiling with hard-float
> + * would result in fp instructions being emitted outside of the boundaries
> + * of the DC_FP_START/END macros, which makes sense as the compiler has no
> + * idea about what is wrapped and what is not
> + *
> + * This is largely just a workaround to avoid breakage introduced with 5.6,
> + * ideally all fp-using code should be moved into its own file, only that
> + * should be compiled with hard-float, and all code exported from there
> + * should be strictly wrapped with DC_FP_START/END
> + */
> +static noinline void dcn30_populate_dml_writeback_from_context_fp(
>  		struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
>  {
>  	int pipe_cnt, i, j;
> @@ -1558,6 +1570,14 @@ void dcn30_populate_dml_writeback_from_context(
>  
>  }
>  
> +void dcn30_populate_dml_writeback_from_context(
> +		struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes)
> +{
> +	DC_FP_START();
> +	dcn30_populate_dml_writeback_from_context_fp(dc, res_ctx, pipes);
> +	DC_FP_END();
> +}
> +
>  unsigned int dcn30_calc_max_scaled_time(
>  		unsigned int time_per_pixel,
>  		enum mmhubbub_wbif_mode mode,
> @@ -2204,7 +2224,19 @@ static bool dcn30_internal_validate_bw(
>  	return out;
>  }
>  
> -void dcn30_calculate_wm_and_dlg(
> +/*
> + * This must be noinline to ensure anything that deals with FP registers
> + * is contained within this call; previously our compiling with hard-float
> + * would result in fp instructions being emitted outside of the boundaries
> + * of the DC_FP_START/END macros, which makes sense as the compiler has no
> + * idea about what is wrapped and what is not
> + *
> + * This is largely just a workaround to avoid breakage introduced with 5.6,
> + * ideally all fp-using code should be moved into its own file, only that
> + * should be compiled with hard-float, and all code exported from there
> + * should be strictly wrapped with DC_FP_START/END
> + */
> +static noinline void dcn30_calculate_wm_and_dlg_fp(
>  		struct dc *dc, struct dc_state *context,
>  		display_e2e_pipe_params_st *pipes,
>  		int pipe_cnt,
> @@ -2360,7 +2392,18 @@ void dcn30_calculate_wm_and_dlg(
>  				dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
>  }
>  
> -bool dcn30_validate_bandwidth(struct dc *dc,
> +void dcn30_calculate_wm_and_dlg(
> +		struct dc *dc, struct dc_state *context,
> +		display_e2e_pipe_params_st *pipes,
> +		int pipe_cnt,
> +		int vlevel)
> +{
> +	DC_FP_START();
> +	dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel);
> +	DC_FP_END();
> +}
> +
> +static noinline bool dcn30_validate_bandwidth_fp(struct dc *dc,
>  		struct dc_state *context,
>  		bool fast_validate)
>  {
> @@ -2411,7 +2454,20 @@ bool dcn30_validate_bandwidth(struct dc *dc,
>  	return out;
>  }
>  
> -static void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
> +bool dcn30_validate_bandwidth(struct dc *dc,
> +		struct dc_state *context,
> +		bool fast_validate)
> +{
> +	bool out;
> +
> +	DC_FP_START();
> +	out = dcn30_validate_bandwidth_fp(dc, context, fast_validate);
> +	DC_FP_END();
> +
> +	return out;
> +}
> +
> +static noinline void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
>                                                         unsigned int *optimal_dcfclk,
>                                                         unsigned int *optimal_fclk)
>  {
> @@ -2478,8 +2534,10 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
>  
>  		// Calculate optimal dcfclk for each uclk
>  		for (i = 0; i < num_uclk_states; i++) {
> +			DC_FP_START();
>  			get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16,
>  					&optimal_dcfclk_for_uclk[i], NULL);
> +			DC_FP_END();
>  			if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) {
>  				optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz;
>  			}
> @@ -2583,6 +2641,8 @@ static bool dcn30_resource_construct(
>  	struct irq_service_init_data init_data;
>  	struct ddc_service_init_data ddc_init_data;
>  
> +	DC_FP_START();
> +
>  	ctx->dc_bios->regs = &bios_regs;
>  
>  	pool->base.res_cap = &res_cap_dcn3;
> @@ -2860,10 +2920,13 @@ static bool dcn30_resource_construct(
>  		pool->base.oem_device = NULL;
>  	}
>  
> +	DC_FP_END();
> +
>  	return true;
>  
>  create_fail:
>  
> +	DC_FP_END();
>  	dcn30_resource_destruct(pool);
>  
>  	return false;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index 879a930358a5..a02a33dcd70b 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -64,6 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
> +CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048
> +CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
> +CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
>  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
>  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
>  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
> @@ -71,8 +74,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflag
>  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags)
>  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags)
>  CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags)
> -CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048
> -CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
>  endif
>  CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
>  CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
> -- 
> 2.25.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx