From: Jun Lei <Jun.Lei@xxxxxxx> [why] Existing support in DC for pstate only accounts for a single latency. This is sufficient when the variance of latency is small, or that pstate support isn't necessary for correct ASIC functionality. Newer ASICs violate both existing assumptions. PState support is mandatory of correct ASIC functionality, but not all latencies have to be supported. Existing code supports a "full p state" which allows memory clock to change, but is hard for DCN to support (as it requires very large buffers). New code will now fall back to a "dummy p state" support when "full p state" cannot be support. This easy p state support should always be allowed. [how] Define a new latency in socBB. Add fallback logic to support it. Note DML is also updated to ensure that fallback will always work. Change-Id: I068e34cae8fa2f7cd31b530a68822c479525048c Signed-off-by: Jun Lei <Jun.Lei@xxxxxxx> Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin@xxxxxxx> Acked-by: Leo Li <sunpeng.li@xxxxxxx> --- .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 2 + drivers/gpu/drm/amd/display/dc/dc.h | 7 + .../drm/amd/display/dc/dcn20/dcn20_hubbub.c | 11 + .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 10 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 59 +- drivers/gpu/drm/amd/display/dc/dml/Makefile | 3 + .../dc/dml/dcn20/display_mode_vba_20v2.c | 5109 +++++++++++++++++ .../dc/dml/dcn20/display_mode_vba_20v2.h | 32 + .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 1701 ++++++ .../dc/dml/dcn20/display_rq_dlg_calc_20v2.h | 74 + .../drm/amd/display/dc/dml/display_mode_lib.c | 12 + .../drm/amd/display/dc/dml/display_mode_lib.h | 1 + .../amd/display/dc/dml/display_mode_structs.h | 1 + .../drm/amd/display/dc/dml/display_mode_vba.c | 8 +- 14 files changed, 7022 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 3cff4f0518d3..7ff0396956b3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -201,6 +201,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, } if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; if (pp_smu && pp_smu->set_pstate_handshake_support) pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support); @@ -308,6 +309,7 @@ void dcn2_init_clocks(struct clk_mgr *clk_mgr) memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate clk_mgr->clks.p_state_change_support = true; + clk_mgr->clks.prev_p_state_change_support = true; } void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b12eee93c253..6da0a6fe2973 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -121,6 +121,7 @@ struct dc_caps { struct dc_bug_wa { bool no_connect_phy_config; bool dedcn20_305_wa; + struct display_mode_lib alternate_dml; }; #endif @@ -263,6 +264,12 @@ struct dc_clocks { int phyclk_khz; int dramclk_khz; bool p_state_change_support; + + /* + * Elements below are not compared for the purposes of + * optimization required + */ + bool prev_p_state_change_support; }; struct dc_bw_validation_profile { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 6e2dbd03f9bf..31d6e79ba2b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -26,6 +26,7 @@ #include "dcn20_hubbub.h" #include "reg_helper.h" +#include "clk_mgr.h" #define REG(reg)\ hubbub1->regs->reg @@ -553,6 +554,16 @@ static void hubbub2_program_watermarks( */ hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + + /* + * There's a special case when going from p-state support to p-state unsupported + * here we are going to LOWER watermarks to go to dummy p-state only, but this has + * to be done prepare_bandwidth, not optimize + */ + if (hubbub1->base.ctx->dc->clk_mgr->clks.prev_p_state_change_support == true && + hubbub1->base.ctx->dc->clk_mgr->clks.p_state_change_support == false) + safe_to_lower = true; + hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); REG_SET(DCHUBBUB_ARB_SAT_LEVEL, 0, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 76ce622e1421..b61774d2e8b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1443,16 +1443,16 @@ void dcn20_prepare_bandwidth( { struct hubbub *hubbub = dc->res_pool->hubbub; + dc->clk_mgr->funcs->update_clocks( + dc->clk_mgr, + context, + false); + /* program dchubbub watermarks */ hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); - - dc->clk_mgr->funcs->update_clocks( - dc->clk_mgr, - context, - false); } void dcn20_optimize_bandwidth( diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 193270ba60e6..2cf788a3704e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2425,7 +2425,7 @@ void dcn20_calculate_dlg_params( } } -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, +static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context, bool fast_validate) { bool out = false; @@ -2477,6 +2477,62 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, return out; } + +bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported = false; + bool full_pstate_supported = false; + bool dummy_pstate_supported = false; + double p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; + + if (fast_validate) + return dcn20_validate_bandwidth_internal(dc, context, true); + + + // Best case, we support full UCLK switch latency + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || + (voltage_supported && full_pstate_supported)) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = true; + goto restore_dml_state; + } + + // Fallback #1: Try to only support G6 temperature read latency + context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; + + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (voltage_supported && dummy_pstate_supported) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = false; + goto restore_dml_state; + } + + // Fallback #2: Retry with "new" DCN20 to support G6 temperature read latency + memcpy (&context->bw_ctx.dml, &dc->work_arounds.alternate_dml, sizeof (struct display_mode_lib)); + context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; + + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false); + dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; + + if (voltage_supported && dummy_pstate_supported) { + context->bw_ctx.bw.dcn.clk.p_state_change_support = false; + goto restore_dml_state; + } + + // ERROR: fallback #2 is supposed to always work. + ASSERT(false); + +restore_dml_state: + memcpy(&context->bw_ctx.dml, &dc->dml, sizeof(struct display_mode_lib)); + context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; + + return voltage_supported; +} + struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer( struct dc_state *state, const struct resource_pool *pool, @@ -3073,6 +3129,7 @@ static bool construct( } dml_init_instance(&dc->dml, &dcn2_0_soc, &dcn2_0_ip, DML_PROJECT_NAVI10); + dml_init_instance(&dc->work_arounds.alternate_dml, &dcn2_0_soc, &dcn2_0_ip, DML_PROJECT_NAVI10v2); if (!dc->debug.disable_pplib_wm_range) { struct pp_smu_wm_range_sets ranges = {0}; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 0bb7a20675c4..1735fc1e2eb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -38,6 +38,8 @@ ifdef CONFIG_DRM_AMD_DC_DCN2_0 CFLAGS_display_mode_vba.o := $(dml_ccflags) CFLAGS_display_mode_vba_20.o := $(dml_ccflags) CFLAGS_display_rq_dlg_calc_20.o := $(dml_ccflags) +CFLAGS_display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_display_rq_dlg_calc_20v2.o := $(dml_ccflags) endif ifdef CONFIG_DRM_AMD_DCN3AG CFLAGS_display_mode_vba_3ag.o := $(dml_ccflags) @@ -51,6 +53,7 @@ DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ ifdef CONFIG_DRM_AMD_DC_DCN2_0 DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o +DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o endif AMD_DAL_DML = $(addprefix $(AMDDALPATH)/dc/dml/,$(DML)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c new file mode 100644 index 000000000000..22455db54980 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -0,0 +1,5109 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "display_mode_vba_20v2.h" +#include "../dml_inline_defs.h" + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +#define BPP_INVALID 0 +#define BPP_BLENDED_PIPE 0xffffffff + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN); +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat); +static unsigned int dscComputeDelay(enum output_format_class pixelFormat); +static bool CalculateDelayAfterScaler( + struct display_mode_lib *mode_lib, + double ReturnBW, + double ReadBandwidthPlaneLuma, + double ReadBandwidthPlaneChroma, + double TotalDataReadBandwidth, + double DisplayPipeLineDeliveryTimeLuma, + double DisplayPipeLineDeliveryTimeChroma, + double DPPCLK, + double DISPCLK, + double PixelClock, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int SwathWidthSingleDPPY, + double BytePerPixelDETY, + double BytePerPixelDETC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler + ); +// Super monster function with some 45 argument +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DPPPerPlane, + unsigned int NumberOfCursors, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double DSTXAfterScaler, + double DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix); +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath); +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidthY, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height); +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime); +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk); +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw); +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe); +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth); + +static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib); +static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib); + +void dml20v2_recalculate(struct display_mode_lib *mode_lib) +{ + ModeSupportAndSystemConfiguration(mode_lib); + mode_lib->vba.FabricAndDRAMBandwidth = dml_min( + mode_lib->vba.DRAMSpeed * mode_lib->vba.NumberOfChannels * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClock * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000.0; + PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); + dml20v2_DisplayPipeConfiguration(mode_lib); + dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); +} + +static double adjust_ReturnBW( + struct display_mode_lib *mode_lib, + double ReturnBW, + bool DCCEnabledAnyPlane, + double ReturnBandwidthToDCN) +{ + double CriticalCompression; + + if (DCCEnabledAnyPlane + && ReturnBandwidthToDCN + > mode_lib->vba.DCFCLK * mode_lib->vba.ReturnBusWidth / 4.0) + ReturnBW = + dml_min( + ReturnBW, + ReturnBandwidthToDCN * 4 + * (1.0 + - mode_lib->vba.UrgentLatencyPixelDataOnly + / ((mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + / ReturnBandwidthToDCN + - mode_lib->vba.DCFCLK + * mode_lib->vba.ReturnBusWidth + / 4) + + mode_lib->vba.UrgentLatencyPixelDataOnly)); + + CriticalCompression = 2.0 * mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / (ReturnBandwidthToDCN * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024); + + if (DCCEnabledAnyPlane && CriticalCompression > 1.0 && CriticalCompression < 4.0) + ReturnBW = + dml_min( + ReturnBW, + 4.0 * ReturnBandwidthToDCN + * (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024 + * mode_lib->vba.ReturnBusWidth + * mode_lib->vba.DCFCLK + * mode_lib->vba.UrgentLatencyPixelDataOnly + / dml_pow( + (ReturnBandwidthToDCN + * mode_lib->vba.UrgentLatencyPixelDataOnly + + (mode_lib->vba.ROBBufferSizeInKByte + - mode_lib->vba.PixelChunkSizeInKByte) + * 1024), + 2)); + + return ReturnBW; +} + +static unsigned int dscceComputeDelay( + unsigned int bpc, + double bpp, + unsigned int sliceWidth, + unsigned int numSlices, + enum output_format_class pixelFormat) +{ + // valid bpc = source bits per component in the set of {8, 10, 12} + // valid bpp = increments of 1/16 of a bit + // min = 6/7/8 in N420/N422/444, respectively + // max = such that compression is 1:1 + //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) + //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} + //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} + + // fixed value + unsigned int rcModelSize = 8192; + + // N422/N420 operate at 2 pixels per clock + unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, l, + Delay, pixels; + + if (pixelFormat == dm_n422 || pixelFormat == dm_420) + pixelsPerClock = 2; + // #all other modes operate at 1 pixel per clock + else + pixelsPerClock = 1; + + //initial transmit delay as per PPS + initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); + + //compute ssm delay + if (bpc == 8) + D = 81; + else if (bpc == 10) + D = 89; + else + D = 113; + + //divide by pixel per cycle to compute slice width as seen by DSC + w = sliceWidth / pixelsPerClock; + + //422 mode has an additional cycle of delay + if (pixelFormat == dm_s422) + s = 1; + else + s = 0; + + //main calculation for the dscce + ix = initalXmitDelay + 45; + wx = (w + 2) / 3; + p = 3 * wx - w; + l0 = ix / w; + a = ix + p * l0; + ax = (a + 2) / 3 + D + 6 + 1; + l = (ax + wx - 1) / wx; + if ((ix % w) == 0 && p != 0) + lstall = 1; + else + lstall = 0; + Delay = l * wx * (numSlices - 1) + ax + s + lstall + 22; + + //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels + pixels = Delay * 3 * pixelsPerClock; + return pixels; +} + +static unsigned int dscComputeDelay(enum output_format_class pixelFormat) +{ + unsigned int Delay = 0; + + if (pixelFormat == dm_420) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc gets pixels every other cycle + Delay = Delay + 2; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc gets pixels every other cycle + Delay = Delay + 13; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc gets pixels every other cycle + Delay = Delay + 3; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else if (pixelFormat == dm_n422) { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 1; + // dscc - input deserializer + Delay = Delay + 5; + // dscc - input cdc fifo + Delay = Delay + 25; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 10; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output serializer + Delay = Delay + 1; + // sft + Delay = Delay + 1; + } else { + // sfr + Delay = Delay + 2; + // dsccif + Delay = Delay + 0; + // dscc - input deserializer + Delay = Delay + 3; + // dscc - input cdc fifo + Delay = Delay + 12; + // dscc - cdc uncertainty + Delay = Delay + 2; + // dscc - output cdc fifo + Delay = Delay + 7; + // dscc - output serializer + Delay = Delay + 1; + // dscc - cdc uncertainty + Delay = Delay + 2; + // sft + Delay = Delay + 1; + } + + return Delay; +} + +static bool CalculateDelayAfterScaler( + struct display_mode_lib *mode_lib, + double ReturnBW, + double ReadBandwidthPlaneLuma, + double ReadBandwidthPlaneChroma, + double TotalDataReadBandwidth, + double DisplayPipeLineDeliveryTimeLuma, + double DisplayPipeLineDeliveryTimeChroma, + double DPPCLK, + double DISPCLK, + double PixelClock, + unsigned int DSCDelay, + unsigned int DPPPerPlane, + bool ScalerEnabled, + unsigned int NumberOfCursors, + double DPPCLKDelaySubtotal, + double DPPCLKDelaySCL, + double DPPCLKDelaySCLLBOnly, + double DPPCLKDelayCNVCFormater, + double DPPCLKDelayCNVCCursor, + double DISPCLKDelaySubtotal, + unsigned int ScalerRecoutWidth, + enum output_format_class OutputFormat, + unsigned int HTotal, + unsigned int SwathWidthSingleDPPY, + double BytePerPixelDETY, + double BytePerPixelDETC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + double *DSTXAfterScaler, + double *DSTYAfterScaler + ) +{ + unsigned int DPPCycles, DISPCLKCycles; + double DataFabricLineDeliveryTimeLuma; + double DataFabricLineDeliveryTimeChroma; + double DSTTotalPixelsAfterScaler; + + DataFabricLineDeliveryTimeLuma = SwathWidthSingleDPPY * SwathHeightY * dml_ceil(BytePerPixelDETY, 1) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneLuma / TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeLuma - DisplayPipeLineDeliveryTimeLuma); + + if (BytePerPixelDETC != 0) { + DataFabricLineDeliveryTimeChroma = SwathWidthSingleDPPY / 2 * SwathHeightC * dml_ceil(BytePerPixelDETC, 2) / (mode_lib->vba.ReturnBW * ReadBandwidthPlaneChroma / TotalDataReadBandwidth); + mode_lib->vba.LastPixelOfLineExtraWatermark = dml_max(mode_lib->vba.LastPixelOfLineExtraWatermark, DataFabricLineDeliveryTimeChroma - DisplayPipeLineDeliveryTimeChroma); + } + + if (ScalerEnabled) + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; + else + DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; + + DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + NumberOfCursors * DPPCLKDelayCNVCCursor; + + DISPCLKCycles = DISPCLKDelaySubtotal; + + if (DPPCLK == 0.0 || DISPCLK == 0.0) + return true; + + *DSTXAfterScaler = DPPCycles * PixelClock / DPPCLK + DISPCLKCycles * PixelClock / DISPCLK + + DSCDelay; + + if (DPPPerPlane > 1) + *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; + + if (OutputFormat == dm_420 || (Interlace && ProgressiveToInterlaceUnitInOPP)) + *DSTYAfterScaler = 1; + else + *DSTYAfterScaler = 0; + + DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * HTotal)) + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * HTotal)); + + return true; +} + +static bool CalculatePrefetchSchedule( + struct display_mode_lib *mode_lib, + double DPPCLK, + double DISPCLK, + double PixelClock, + double DCFCLKDeepSleep, + unsigned int DPPPerPlane, + unsigned int NumberOfCursors, + unsigned int VBlank, + unsigned int HTotal, + unsigned int MaxInterDCNTileRepeaters, + unsigned int VStartup, + unsigned int PageTableLevels, + bool GPUVMEnable, + bool DynamicMetadataEnable, + unsigned int DynamicMetadataLinesBeforeActiveRequired, + unsigned int DynamicMetadataTransmittedBytes, + bool DCCEnable, + double UrgentLatencyPixelDataOnly, + double UrgentExtraLatency, + double TCalc, + unsigned int PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + double PrefetchSourceLinesY, + unsigned int SwathWidthY, + double BytePerPixelDETY, + double VInitPreFillY, + unsigned int MaxNumSwathY, + double PrefetchSourceLinesC, + double BytePerPixelDETC, + double VInitPreFillC, + unsigned int MaxNumSwathC, + unsigned int SwathHeightY, + unsigned int SwathHeightC, + double TWait, + bool XFCEnabled, + double XFCRemoteSurfaceFlipDelay, + bool InterlaceEnable, + bool ProgressiveToInterlaceUnitInOPP, + double DSTXAfterScaler, + double DSTYAfterScaler, + double *DestinationLinesForPrefetch, + double *PrefetchBandwidth, + double *DestinationLinesToRequestVMInVBlank, + double *DestinationLinesToRequestRowInVBlank, + double *VRatioPrefetchY, + double *VRatioPrefetchC, + double *RequiredPrefetchPixDataBW, + double *Tno_bw, + unsigned int *VUpdateOffsetPix, + double *VUpdateWidthPix, + double *VReadyOffsetPix) +{ + bool MyError = false; + double TotalRepeaterDelayTime; + double Tdm, LineTime, Tsetup; + double dst_y_prefetch_equ; + double Tsw_oto; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tpre_oto; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + + *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); + TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / DPPCLK + 3.0 / DISPCLK); + *VUpdateWidthPix = (14.0 / DCFCLKDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) + * PixelClock; + + *VReadyOffsetPix = dml_max( + 150.0 / DPPCLK, + TotalRepeaterDelayTime + 20.0 / DCFCLKDeepSleep + 10.0 / DPPCLK) + * PixelClock; + + Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; + + LineTime = (double) HTotal / PixelClock; + + if (DynamicMetadataEnable) { + double Tdmbf, Tdmec, Tdmsks; + + Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); + Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; + Tdmec = LineTime; + if (DynamicMetadataLinesBeforeActiveRequired == 0) + Tdmsks = VBlank * LineTime / 2.0; + else + Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; + if (InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) + Tdmsks = Tdmsks / 2; + if (VStartup * LineTime + < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { + MyError = true; + } + } else + Tdm = 0; + + if (GPUVMEnable) { + if (PageTableLevels == 4) + *Tno_bw = UrgentExtraLatency + UrgentLatencyPixelDataOnly; + else if (PageTableLevels == 3) + *Tno_bw = UrgentExtraLatency; + else + *Tno_bw = 0; + } else if (DCCEnable) + *Tno_bw = LineTime; + else + *Tno_bw = LineTime / 4; + + dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime + - (Tsetup + Tdm) / LineTime + - (DSTYAfterScaler + DSTXAfterScaler / HTotal); + + Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; + + prefetch_bw_oto = (MetaRowByte + PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 * dml_ceil(BytePerPixelDETC, 2)) + / Tsw_oto; + + if (GPUVMEnable == true) { + Tvm_oto = + dml_max( + *Tno_bw + PDEAndMetaPTEBytesFrame / prefetch_bw_oto, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4.0)); + } else + Tvm_oto = LineTime / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + Tr0_oto = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) / prefetch_bw_oto, + dml_max(UrgentLatencyPixelDataOnly, dml_max(LineTime - Tvm_oto, LineTime / 4))); + } else + Tr0_oto = LineTime - Tvm_oto; + + Tpre_oto = Tvm_oto + Tr0_oto + Tsw_oto; + + dst_y_prefetch_oto = Tpre_oto / LineTime; + + if (dst_y_prefetch_oto < dst_y_prefetch_equ) + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + else + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + + *DestinationLinesForPrefetch = dml_floor(4.0 * (*DestinationLinesForPrefetch + 0.125), 1) + / 4; + + dml_print("DML: VStartup: %d\n", VStartup); + dml_print("DML: TCalc: %f\n", TCalc); + dml_print("DML: TWait: %f\n", TWait); + dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay); + dml_print("DML: LineTime: %f\n", LineTime); + dml_print("DML: Tsetup: %f\n", Tsetup); + dml_print("DML: Tdm: %f\n", Tdm); + dml_print("DML: DSTYAfterScaler: %f\n", DSTYAfterScaler); + dml_print("DML: DSTXAfterScaler: %f\n", DSTXAfterScaler); + dml_print("DML: HTotal: %d\n", HTotal); + + *PrefetchBandwidth = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + if (*DestinationLinesForPrefetch > 1) { + *PrefetchBandwidth = (PDEAndMetaPTEBytesFrame + 2 * MetaRowByte + + 2 * PixelPTEBytesPerRow + + PrefetchSourceLinesY * SwathWidthY * dml_ceil(BytePerPixelDETY, 1) + + PrefetchSourceLinesC * SwathWidthY / 2 + * dml_ceil(BytePerPixelDETC, 2)) + / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); + if (GPUVMEnable) { + TimeForFetchingMetaPTE = + dml_max( + *Tno_bw + + (double) PDEAndMetaPTEBytesFrame + / *PrefetchBandwidth, + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (PageTableLevels + - 1), + LineTime / 4)); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingMetaPTE = LineTime / 4; + else + TimeForFetchingMetaPTE = 0.0; + } + + if ((GPUVMEnable == true || DCCEnable == true)) { + TimeForFetchingRowInVBlank = + dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / *PrefetchBandwidth, + dml_max( + UrgentLatencyPixelDataOnly, + dml_max( + LineTime + - TimeForFetchingMetaPTE, + LineTime + / 4.0))); + } else { + if (NumberOfCursors > 0 || XFCEnabled) + TimeForFetchingRowInVBlank = LineTime - TimeForFetchingMetaPTE; + else + TimeForFetchingRowInVBlank = 0.0; + } + + *DestinationLinesToRequestVMInVBlank = dml_floor( + 4.0 * (TimeForFetchingMetaPTE / LineTime + 0.125), + 1) / 4.0; + + *DestinationLinesToRequestRowInVBlank = dml_floor( + 4.0 * (TimeForFetchingRowInVBlank / LineTime + 0.125), + 1) / 4.0; + + LinesToRequestPrefetchPixelData = + *DestinationLinesForPrefetch + - ((NumberOfCursors > 0 || GPUVMEnable + || DCCEnable) ? + (*DestinationLinesToRequestVMInVBlank + + *DestinationLinesToRequestRowInVBlank) : + 0.0); + + if (LinesToRequestPrefetchPixelData > 0) { + + *VRatioPrefetchY = (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + *VRatioPrefetchY = + dml_max( + (double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData, + (double) MaxNumSwathY + * SwathHeightY + / (LinesToRequestPrefetchPixelData + - (VInitPreFillY + - 3.0) + / 2.0)); + *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); + } else { + MyError = true; + *VRatioPrefetchY = 0; + } + } + + *VRatioPrefetchC = (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + + if ((SwathHeightC > 4)) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + *VRatioPrefetchC = + dml_max( + *VRatioPrefetchC, + (double) MaxNumSwathC + * SwathHeightC + / (LinesToRequestPrefetchPixelData + - (VInitPreFillC + - 3.0) + / 2.0)); + *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); + } else { + MyError = true; + *VRatioPrefetchC = 0; + } + } + + *RequiredPrefetchPixDataBW = + DPPPerPlane + * ((double) PrefetchSourceLinesY + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETY, + 1) + + (double) PrefetchSourceLinesC + / LinesToRequestPrefetchPixelData + * dml_ceil( + BytePerPixelDETC, + 2) + / 2) + * SwathWidthY / LineTime; + } else { + MyError = true; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + } else { + MyError = true; + } + + if (MyError) { + *PrefetchBandwidth = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; + *DestinationLinesToRequestVMInVBlank = 0; + *DestinationLinesToRequestRowInVBlank = 0; + *DestinationLinesForPrefetch = 0; + LinesToRequestPrefetchPixelData = 0; + *VRatioPrefetchY = 0; + *VRatioPrefetchC = 0; + *RequiredPrefetchPixDataBW = 0; + } + + return MyError; +} + +static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); +} + +static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) +{ + return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); +} + +static double CalculatePrefetchSourceLines( + struct display_mode_lib *mode_lib, + double VRatio, + double vtaps, + bool Interlace, + bool ProgressiveToInterlaceUnitInOPP, + unsigned int SwathHeight, + unsigned int ViewportYStart, + double *VInitPreFill, + unsigned int *MaxNumSwath) +{ + unsigned int MaxPartialSwath; + + if (ProgressiveToInterlaceUnitInOPP) + *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); + else + *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); + + if (!mode_lib->vba.IgnoreViewportPositioning) { + + *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) + % SwathHeight; + MaxPartialSwath = dml_max(1U, MaxPartialSwath); + + } else { + + if (ViewportYStart != 0) + dml_print( + "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); + + *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); + + if (*VInitPreFill > 1.0) + MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; + else + MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) + % SwathHeight; + } + + return *MaxNumSwath * SwathHeight + MaxPartialSwath; +} + +static unsigned int CalculateVMAndRowBytes( + struct display_mode_lib *mode_lib, + bool DCCEnable, + unsigned int BlockHeight256Bytes, + unsigned int BlockWidth256Bytes, + enum source_format_class SourcePixelFormat, + unsigned int SurfaceTiling, + unsigned int BytePerPixel, + enum scan_direction_class ScanDirection, + unsigned int ViewportWidth, + unsigned int ViewportHeight, + unsigned int SwathWidth, + bool GPUVMEnable, + unsigned int VMMPageSize, + unsigned int PTEBufferSizeInRequestsLuma, + unsigned int PDEProcessingBufIn64KBReqs, + unsigned int Pitch, + unsigned int DCCMetaPitch, + unsigned int *MacroTileWidth, + unsigned int *MetaRowByte, + unsigned int *PixelPTEBytesPerRow, + bool *PTEBufferSizeNotExceeded, + unsigned int *dpte_row_height, + unsigned int *meta_row_height) +{ + unsigned int MetaRequestHeight; + unsigned int MetaRequestWidth; + unsigned int MetaSurfWidth; + unsigned int MetaSurfHeight; + unsigned int MPDEBytesFrame; + unsigned int MetaPTEBytesFrame; + unsigned int DCCMetaSurfaceBytes; + + unsigned int MacroTileSizeBytes; + unsigned int MacroTileHeight; + unsigned int DPDE0BytesFrame; + unsigned int ExtraDPDEBytesFrame; + unsigned int PDEAndMetaPTEBytesFrame; + + if (DCCEnable == true) { + MetaRequestHeight = 8 * BlockHeight256Bytes; + MetaRequestWidth = 8 * BlockWidth256Bytes; + if (ScanDirection == dm_horz) { + *meta_row_height = MetaRequestHeight; + MetaSurfWidth = dml_ceil((double) SwathWidth - 1, MetaRequestWidth) + + MetaRequestWidth; + *MetaRowByte = MetaSurfWidth * MetaRequestHeight * BytePerPixel / 256.0; + } else { + *meta_row_height = MetaRequestWidth; + MetaSurfHeight = dml_ceil((double) SwathWidth - 1, MetaRequestHeight) + + MetaRequestHeight; + *MetaRowByte = MetaSurfHeight * MetaRequestWidth * BytePerPixel / 256.0; + } + if (ScanDirection == dm_horz) { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } else { + DCCMetaSurfaceBytes = DCCMetaPitch + * (dml_ceil( + (double) ViewportHeight - 1, + 64 * BlockHeight256Bytes) + + 64 * BlockHeight256Bytes) * BytePerPixel + / 256; + } + if (GPUVMEnable == true) { + MetaPTEBytesFrame = (dml_ceil( + (double) (DCCMetaSurfaceBytes - VMMPageSize) + / (8 * VMMPageSize), + 1) + 1) * 64; + MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1); + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + } + } else { + MetaPTEBytesFrame = 0; + MPDEBytesFrame = 0; + *MetaRowByte = 0; + } + + if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_lvp) { + MacroTileSizeBytes = 256; + MacroTileHeight = BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x + || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { + MacroTileSizeBytes = 4096; + MacroTileHeight = 4 * BlockHeight256Bytes; + } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t + || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d + || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x + || SurfaceTiling == dm_sw_64kb_r_x) { + MacroTileSizeBytes = 65536; + MacroTileHeight = 16 * BlockHeight256Bytes; + } else { + MacroTileSizeBytes = 262144; + MacroTileHeight = 32 * BlockHeight256Bytes; + } + *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; + + if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) { + if (ScanDirection == dm_horz) { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + ViewportHeight + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } else { + DPDE0BytesFrame = + 64 + * (dml_ceil( + ((Pitch + * (dml_ceil( + (double) SwathWidth + - 1, + MacroTileHeight) + + MacroTileHeight) + * BytePerPixel) + - MacroTileSizeBytes) + / (8 + * 2097152), + 1) + 1); + } + ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2); + } else { + DPDE0BytesFrame = 0; + ExtraDPDEBytesFrame = 0; + } + + PDEAndMetaPTEBytesFrame = MetaPTEBytesFrame + MPDEBytesFrame + DPDE0BytesFrame + + ExtraDPDEBytesFrame; + + if (GPUVMEnable == true) { + unsigned int PTERequestSize; + unsigned int PixelPTEReqHeight; + unsigned int PixelPTEReqWidth; + double FractionOfPTEReturnDrop; + unsigned int EffectivePDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + PixelPTEReqHeight = 1; + PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } else if (MacroTileSizeBytes == 4096) { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + if (ScanDirection == dm_horz) + FractionOfPTEReturnDrop = 0; + else + FractionOfPTEReturnDrop = 7 / 8; + } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { + PixelPTEReqHeight = 16 * BlockHeight256Bytes; + PixelPTEReqWidth = 16 * BlockWidth256Bytes; + PTERequestSize = 128; + FractionOfPTEReturnDrop = 0; + } else { + PixelPTEReqHeight = MacroTileHeight; + PixelPTEReqWidth = 8 * *MacroTileWidth; + PTERequestSize = 64; + FractionOfPTEReturnDrop = 0; + } + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs / 2; + else + EffectivePDEProcessingBufIn64KBReqs = PDEProcessingBufIn64KBReqs; + + if (SurfaceTiling == dm_sw_linear) { + *dpte_row_height = + dml_min( + 128, + 1 + << (unsigned int) dml_floor( + dml_log2( + dml_min( + (double) PTEBufferSizeInRequestsLuma + * PixelPTEReqWidth, + EffectivePDEProcessingBufIn64KBReqs + * 65536.0 + / BytePerPixel) + / Pitch), + 1)); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + (double) (Pitch * *dpte_row_height - 1) + / PixelPTEReqWidth, + 1) + 1); + } else if (ScanDirection == dm_horz) { + *dpte_row_height = PixelPTEReqHeight; + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil(((double) SwathWidth - 1) / PixelPTEReqWidth, 1) + + 1); + } else { + *dpte_row_height = dml_min(PixelPTEReqWidth, *MacroTileWidth); + *PixelPTEBytesPerRow = PTERequestSize + * (dml_ceil( + ((double) SwathWidth - 1) + / PixelPTEReqHeight, + 1) + 1); + } + if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) + <= 64 * PTEBufferSizeInRequestsLuma) { + *PTEBufferSizeNotExceeded = true; + } else { + *PTEBufferSizeNotExceeded = false; + } + } else { + *PixelPTEBytesPerRow = 0; + *PTEBufferSizeNotExceeded = true; + } + + return PDEAndMetaPTEBytesFrame; +} + +static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( + struct display_mode_lib *mode_lib) +{ + unsigned int j, k; + + mode_lib->vba.WritebackDISPCLK = 0.0; + mode_lib->vba.DISPCLKWithRamping = 0; + mode_lib->vba.DISPCLKWithoutRamping = 0; + mode_lib->vba.GlobalDPPCLK = 0.0; + + // dml_ml->vba.DISPCLK and dml_ml->vba.DPPCLK Calculation + // + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) { + mode_lib->vba.WritebackDISPCLK = + dml_max( + mode_lib->vba.WritebackDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + + mode_lib->vba.DPPCLKUsingSingleDPPLuma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k], + 1.0)); + + if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPLuma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; + } + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = 0.0; + mode_lib->vba.DPPCLKUsingSingleDPP[k] = + mode_lib->vba.DPPCLKUsingSingleDPPLuma; + } else { + if (mode_lib->vba.HRatio[k] > 1) { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + mode_lib->vba.DPPCLKUsingSingleDPPChroma = + mode_lib->vba.PixelClock[k] + * dml_max( + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2), + dml_max( + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4 + / mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k], + 1.0)); + + if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) + && mode_lib->vba.DPPCLKUsingSingleDPPChroma + < 2 * mode_lib->vba.PixelClock[k]) { + mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 + * mode_lib->vba.PixelClock[k]; + } + + mode_lib->vba.DPPCLKUsingSingleDPP[k] = dml_max( + mode_lib->vba.DPPCLKUsingSingleDPPLuma, + mode_lib->vba.DPPCLKUsingSingleDPPChroma); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] != k) + continue; + if (mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] / 2 + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } else if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DISPCLKWithRamping = + dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100) + * (1 + + mode_lib->vba.DISPCLKRampingMargin + / 100)); + mode_lib->vba.DISPCLKWithoutRamping = + dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.PixelClock[k] + * (1 + + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100)); + } + } + + mode_lib->vba.DISPCLKWithRamping = dml_max( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.WritebackDISPCLK); + mode_lib->vba.DISPCLKWithoutRamping = dml_max( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.WritebackDISPCLK); + + ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0); + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( + mode_lib->vba.DISPCLKWithoutRamping, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states].dispclk_mhz, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; + } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity + > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { + mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; + } else { + mode_lib->vba.DISPCLK_calculated = + mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; + } + DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.DPPCLK_calculated[k] = 0; + } else { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.DPPCLKUsingSingleDPP[k] + / mode_lib->vba.DPPPerPlane[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); + } + mode_lib->vba.GlobalDPPCLK = dml_max( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DPPCLK_calculated[k]); + } + mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( + mode_lib->vba.GlobalDPPCLK, + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 + * dml_ceil( + mode_lib->vba.DPPCLK_calculated[k] * 255 + / mode_lib->vba.GlobalDPPCLK, + 1); + DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]); + } + + // Urgent Watermark + mode_lib->vba.DCCEnabledAnyPlane = false; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.DCCEnabledAnyPlane = true; + + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000) + * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + mode_lib->vba.ReturnBW = mode_lib->vba.ReturnBandwidthToDCN; + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + // Let's do this calculation again?? + mode_lib->vba.ReturnBandwidthToDCN = dml_min( + mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLK, + mode_lib->vba.FabricAndDRAMBandwidth * 1000); + mode_lib->vba.ReturnBW = adjust_ReturnBW( + mode_lib, + mode_lib->vba.ReturnBW, + mode_lib->vba.DCCEnabledAnyPlane, + mode_lib->vba.ReturnBandwidthToDCN); + + DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK); + DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN); + DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourceScan[k] == dm_horz) + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; + else + mode_lib->vba.SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; + + if (mode_lib->vba.ODMCombineEnabled[k] == true) + MainPlaneDoesODMCombine = true; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[j] == true) + MainPlaneDoesODMCombine = true; + + if (MainPlaneDoesODMCombine == true) + mode_lib->vba.SwathWidthY[k] = dml_min( + (double) mode_lib->vba.SwathWidthSingleDPPY[k], + dml_round( + mode_lib->vba.HActive[k] / 2.0 + * mode_lib->vba.HRatio[k])); + else { + if (mode_lib->vba.DPPPerPlane[k] == 0) { + mode_lib->vba.SwathWidthY[k] = 0; + } else { + mode_lib->vba.SwathWidthY[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / mode_lib->vba.DPPPerPlane[k]; + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + mode_lib->vba.BytePerPixelDETY[k] = 8; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + mode_lib->vba.BytePerPixelDETY[k] = 4; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + mode_lib->vba.BytePerPixelDETY[k] = 2; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + mode_lib->vba.BytePerPixelDETY[k] = 1; + mode_lib->vba.BytePerPixelDETC[k] = 2; + } else { // dm_420_10 + mode_lib->vba.BytePerPixelDETY[k] = 4.0 / 3.0; + mode_lib->vba.BytePerPixelDETC[k] = 8.0 / 3.0; + } + } + + mode_lib->vba.TotalDataReadBandwidth = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ReadBandwidthPlaneLuma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + mode_lib->vba.ReadBandwidthPlaneChroma[k] = mode_lib->vba.SwathWidthSingleDPPY[k] + / 2 * dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k] / 2; + DTRACE( + " read_bw[%i] = %fBps", + k, + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]); + mode_lib->vba.TotalDataReadBandwidth += mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k]; + } + + mode_lib->vba.TotalDCCActiveDPP = 0; + mode_lib->vba.TotalActiveDPP = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + if (mode_lib->vba.DCCEnable[k]) + mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + + mode_lib->vba.DPPPerPlane[k]; + } + + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = + (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK + + mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly + * mode_lib->vba.NumberOfChannels + / mode_lib->vba.ReturnBW; + + mode_lib->vba.LastPixelOfLineExtraWatermark = 0; + + mode_lib->vba.UrgentExtraLatency = mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency + + (mode_lib->vba.TotalActiveDPP * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalDCCActiveDPP + * mode_lib->vba.MetaChunkSize) * 1024.0 + / mode_lib->vba.ReturnBW; + + if (mode_lib->vba.GPUVMEnable) + mode_lib->vba.UrgentExtraLatency += mode_lib->vba.TotalActiveDPP + * mode_lib->vba.PTEGroupSize / mode_lib->vba.ReturnBW; + + mode_lib->vba.UrgentWatermark = mode_lib->vba.UrgentLatencyPixelDataOnly + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" urgent_extra_latency = %fus", mode_lib->vba.UrgentExtraLatency); + DTRACE(" wm_urgent = %fus", mode_lib->vba.UrgentWatermark); + + mode_lib->vba.UrgentLatency = mode_lib->vba.UrgentLatencyPixelDataOnly; + + mode_lib->vba.TotalActiveWriteback = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.WritebackEnable[k]) + mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackUrgentWatermark = mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_urgent = %fus", mode_lib->vba.WritebackUrgentWatermark); + + // NB P-State/DRAM Clock Change Watermark + mode_lib->vba.DRAMClockChangeWatermark = mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.UrgentWatermark; + + DTRACE(" wm_pstate_change = %fus", mode_lib->vba.DRAMClockChangeWatermark); + + DTRACE(" calculating wb pstate watermark"); + DTRACE(" total wb outputs %d", mode_lib->vba.TotalActiveWriteback); + DTRACE(" socclk frequency %f Mhz", mode_lib->vba.SOCCLK); + + if (mode_lib->vba.TotalActiveWriteback <= 1) + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency; + else + mode_lib->vba.WritebackDRAMClockChangeWatermark = + mode_lib->vba.DRAMClockChangeLatency + + mode_lib->vba.WritebackLatency + + mode_lib->vba.WritebackChunkSize * 1024.0 / 32 + / mode_lib->vba.SOCCLK; + + DTRACE(" wm_wb_pstate %fus", mode_lib->vba.WritebackDRAMClockChangeWatermark); + + // Stutter Efficiency + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.LinesInDETY[k] = mode_lib->vba.DETBufferSizeY[k] + / mode_lib->vba.BytePerPixelDETY[k] / mode_lib->vba.SwathWidthY[k]; + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETY[k], + mode_lib->vba.SwathHeightY[k]); + mode_lib->vba.FullDETBufferingTimeY[k] = + mode_lib->vba.LinesInDETYRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k]; + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.LinesInDETC[k] = mode_lib->vba.DETBufferSizeC[k] + / mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.SwathWidthY[k] / 2); + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = dml_floor( + mode_lib->vba.LinesInDETC[k], + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.FullDETBufferingTimeC[k] = + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2); + } else { + mode_lib->vba.LinesInDETC[k] = 0; + mode_lib->vba.LinesInDETCRoundedDownToSwath[k] = 0; + mode_lib->vba.FullDETBufferingTimeC[k] = 999999; + } + } + + mode_lib->vba.MinFullDETBufferingTime = 999999.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.FullDETBufferingTimeY[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeY[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + if (mode_lib->vba.FullDETBufferingTimeC[k] + < mode_lib->vba.MinFullDETBufferingTime) { + mode_lib->vba.MinFullDETBufferingTime = + mode_lib->vba.FullDETBufferingTimeC[k]; + mode_lib->vba.FrameTimeForMinFullDETBufferingTime = + (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } + } + + mode_lib->vba.AverageReadBandwidthGBytePerSecond = 0.0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / mode_lib->vba.DCCRate[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / mode_lib->vba.DCCRate[k] + / 1000; + } else { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000; + } + if (mode_lib->vba.DCCEnable[k]) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 256 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 256; + } + if (mode_lib->vba.GPUVMEnable) { + mode_lib->vba.AverageReadBandwidthGBytePerSecond = + mode_lib->vba.AverageReadBandwidthGBytePerSecond + + mode_lib->vba.ReadBandwidthPlaneLuma[k] + / 1000 / 512 + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + / 1000 / 512; + } + } + + mode_lib->vba.PartOfBurstThatFitsInROB = + dml_min( + mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.ROBBufferSizeInKByte * 1024 + * mode_lib->vba.TotalDataReadBandwidth + / (mode_lib->vba.AverageReadBandwidthGBytePerSecond + * 1000)); + mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB + * (mode_lib->vba.AverageReadBandwidthGBytePerSecond * 1000) + / mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.ReturnBW + + (mode_lib->vba.MinFullDETBufferingTime + * mode_lib->vba.TotalDataReadBandwidth + - mode_lib->vba.PartOfBurstThatFitsInROB) + / (mode_lib->vba.DCFCLK * 64); + if (mode_lib->vba.TotalActiveWriteback == 0) { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1 + - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime) + / mode_lib->vba.MinFullDETBufferingTime) * 100; + } else { + mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0; + } + + mode_lib->vba.SmallestVBlank = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.VBlankTime = 0; + } + mode_lib->vba.SmallestVBlank = dml_min( + mode_lib->vba.SmallestVBlank, + mode_lib->vba.VBlankTime); + } + + mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100 + * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime + - mode_lib->vba.SmallestVBlank) + + mode_lib->vba.SmallestVBlank) + / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100; + + // dml_ml->vba.DCFCLK Deep Sleep + mode_lib->vba.DCFCLKDeepSleep = 8.0; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; k++) { + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = + dml_max( + 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], + 1.1 * mode_lib->vba.SwathWidthY[k] / 2.0 + * dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2) / 32 + / mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k]); + } else + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * mode_lib->vba.SwathWidthY[k] + * dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1) / 64.0 + / mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k]; + mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max( + mode_lib->vba.DCFCLKDeepSleepPerPlane[k], + mode_lib->vba.PixelClock[k] / 16.0); + mode_lib->vba.DCFCLKDeepSleep = dml_max( + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + + DTRACE( + " dcfclk_deepsleep_per_plane[%i] = %fMHz", + k, + mode_lib->vba.DCFCLKDeepSleepPerPlane[k]); + } + + DTRACE(" dcfclk_deepsleep_mhz = %fMHz", mode_lib->vba.DCFCLKDeepSleep); + + // Stutter Watermark + mode_lib->vba.StutterExitWatermark = mode_lib->vba.SRExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency + 10 / mode_lib->vba.DCFCLKDeepSleep; + mode_lib->vba.StutterEnterPlusExitWatermark = mode_lib->vba.SREnterPlusExitTime + + mode_lib->vba.LastPixelOfLineExtraWatermark + + mode_lib->vba.UrgentExtraLatency; + + DTRACE(" wm_cstate_exit = %fus", mode_lib->vba.StutterExitWatermark); + DTRACE(" wm_cstate_enter_exit = %fus", mode_lib->vba.StutterEnterPlusExitWatermark); + + // Urgent Latency Supported + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.EffectiveDETPlusLBLinesLuma = + dml_floor( + mode_lib->vba.LinesInDETY[k] + + dml_min( + mode_lib->vba.LinesInDETY[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETY[k] + * mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesLuma), + mode_lib->vba.SwathHeightY[k]); + + mode_lib->vba.UrgentLatencySupportUsLuma = mode_lib->vba.EffectiveDETPlusLBLinesLuma + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] + - mode_lib->vba.EffectiveDETPlusLBLinesLuma + * mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.BytePerPixelDETY[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + mode_lib->vba.EffectiveDETPlusLBLinesChroma = + dml_floor( + mode_lib->vba.LinesInDETC[k] + + dml_min( + mode_lib->vba.LinesInDETC[k] + * mode_lib->vba.DPPCLK[k] + * mode_lib->vba.BytePerPixelDETC[k] + * mode_lib->vba.PSCL_THROUGHPUT_CHROMA[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]), + (double) mode_lib->vba.EffectiveLBLatencyHidingSourceLinesChroma), + mode_lib->vba.SwathHeightC[k]); + mode_lib->vba.UrgentLatencySupportUsChroma = + mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + / (mode_lib->vba.VRatio[k] / 2) + - mode_lib->vba.EffectiveDETPlusLBLinesChroma + * (mode_lib->vba.SwathWidthY[k] + / 2) + * mode_lib->vba.BytePerPixelDETC[k] + / (mode_lib->vba.ReturnBW + / mode_lib->vba.DPPPerPlane[k]); + mode_lib->vba.UrgentLatencySupportUs[k] = dml_min( + mode_lib->vba.UrgentLatencySupportUsLuma, + mode_lib->vba.UrgentLatencySupportUsChroma); + } else { + mode_lib->vba.UrgentLatencySupportUs[k] = + mode_lib->vba.UrgentLatencySupportUsLuma; + } + } + + mode_lib->vba.MinUrgentLatencySupportUs = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MinUrgentLatencySupportUs = dml_min( + mode_lib->vba.MinUrgentLatencySupportUs, + mode_lib->vba.UrgentLatencySupportUs[k]); + } + + // Non-Urgent Latency Tolerance + mode_lib->vba.NonUrgentLatencyTolerance = mode_lib->vba.MinUrgentLatencySupportUs + - mode_lib->vba.UrgentWatermark; + + // DSCCLK + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { + mode_lib->vba.DSCCLK_calculated[k] = 0.0; + } else { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] == dm_n422) + mode_lib->vba.DSCFormatFactor = 2; + else + mode_lib->vba.DSCFormatFactor = 1; + if (mode_lib->vba.ODMCombineEnabled[k]) + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 6 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + else + mode_lib->vba.DSCCLK_calculated[k] = + mode_lib->vba.PixelClockBackEnd[k] / 3 + / mode_lib->vba.DSCFormatFactor + / (1 + - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading + / 100); + } + } + + // DSC Delay + // TODO + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double bpp = mode_lib->vba.OutputBpp[k]; + unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; + + if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { + if (!mode_lib->vba.ODMCombineEnabled[k]) { + mode_lib->vba.DSCDelay[k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + mode_lib->vba.DSCDelay[k] = + 2 + * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + bpp, + dml_ceil( + (double) mode_lib->vba.HActive[k] + / mode_lib->vba.NumberOfDSCSlices[k], + 1), + slices / 2.0, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k])); + } + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.PixelClockBackEnd[k]; + } else { + mode_lib->vba.DSCDelay[k] = 0; + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes + if (j != k && mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.DSCEnabled[j]) + mode_lib->vba.DSCDelay[k] = mode_lib->vba.DSCDelay[j]; + + // Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PixelPTEBytesPerRowY; + unsigned int MetaRowByteY; + unsigned int MetaRowByteC; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int PixelPTEBytesPerRowC; + + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + dml_ceil(mode_lib->vba.BytePerPixelDETC[k], 2), + &mode_lib->vba.BlockHeight256BytesY[k], + &mode_lib->vba.BlockHeight256BytesC[k], + &mode_lib->vba.BlockWidth256BytesY[k], + &mode_lib->vba.BlockWidth256BytesC[k]); + PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesY[k], + mode_lib->vba.BlockWidth256BytesY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &MetaRowByteY, + &PixelPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.VInitPreFillY[k], + &mode_lib->vba.MaxNumSwathY[k]); + + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { + PDEAndMetaPTEBytesFrameC = + CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.BlockHeight256BytesC[k], + mode_lib->vba.BlockWidth256BytesC[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETC[k], + 2), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2, + mode_lib->vba.ViewportHeight[k] / 2, + mode_lib->vba.SwathWidthY[k] / 2, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0, + &mode_lib->vba.MacroTileWidthC[k], + &MetaRowByteC, + &PixelPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceeded[mode_lib->vba.VoltageLevel][0], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightC[k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.VInitPreFillC[k], + &mode_lib->vba.MaxNumSwathC[k]); + } else { + PixelPTEBytesPerRowC = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC = 0; + mode_lib->vba.MaxNumSwathC[k] = 0; + mode_lib->vba.PrefetchSourceLinesC[k] = 0; + } + + mode_lib->vba.PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + + PDEAndMetaPTEBytesFrameC; + mode_lib->vba.MetaRowByte[k] = MetaRowByteY + MetaRowByteC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + MetaRowByteY, + MetaRowByteC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + PixelPTEBytesPerRowY, + PixelPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + + mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) + / mode_lib->vba.DISPCLK; + } else + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[j] == k + && mode_lib->vba.WritebackEnable[j] == true) { + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + dml_max( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[j], + mode_lib->vba.WritebackHRatio[j], + mode_lib->vba.WritebackVRatio[j], + mode_lib->vba.WritebackLumaHTaps[j], + mode_lib->vba.WritebackLumaVTaps[j], + mode_lib->vba.WritebackChromaHTaps[j], + mode_lib->vba.WritebackChromaVTaps[j], + mode_lib->vba.WritebackDestinationWidth[j]) + / mode_lib->vba.DISPCLK); + } + } + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) + if (mode_lib->vba.BlendingAndTiming[k] == j) + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] = + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][j]; + + mode_lib->vba.VStartupLines = 13; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.MaxVStartupLines[k] = + mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max( + 1.0, + dml_ceil( + mode_lib->vba.WritebackDelay[mode_lib->vba.VoltageLevel][k] + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1)); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + mode_lib->vba.MaximumMaxVStartupLines = dml_max( + mode_lib->vba.MaximumMaxVStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.cursor_bw[k] = 0.0; + for (j = 0; j < mode_lib->vba.NumberOfCursors[k]; ++j) + mode_lib->vba.cursor_bw[k] += mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] / 8.0 + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * mode_lib->vba.VRatio[k]; + } + + do { + double MaxTotalRDBandwidth = 0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThan4 = false; + bool prefetch_vm_bw_valid = true; + bool prefetch_row_bw_valid = true; + double TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil( + mode_lib->vba.BytePerPixelDETY[k], + 1), + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; + } + + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBW, mode_lib->vba.ReadBandwidthPlaneLuma[k], mode_lib->vba.ReadBandwidthPlaneChroma[k], mode_lib->vba.TotalDataReadBandwidth, + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], + mode_lib->vba.DPPCLK[k], mode_lib->vba.DISPCLK, mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelay[k], mode_lib->vba.DPPPerPlane[k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthY[k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], + mode_lib->vba.SwathWidthSingleDPPY[k], mode_lib->vba.BytePerPixelDETY[k], mode_lib->vba.BytePerPixelDETC[k], mode_lib->vba.SwathHeightY[k], mode_lib->vba.SwathHeightC[k], mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); + + mode_lib->vba.ErrorResult[k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.DPPCLK[k], + mode_lib->vba.DISPCLK, + mode_lib->vba.PixelClock[k], + mode_lib->vba.DCFCLKDeepSleep, + mode_lib->vba.DPPPerPlane[k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]), + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.TCalc, + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.PrefetchSourceLinesY[k], + mode_lib->vba.SwathWidthY[k], + mode_lib->vba.BytePerPixelDETY[k], + mode_lib->vba.VInitPreFillY[k], + mode_lib->vba.MaxNumSwathY[k], + mode_lib->vba.PrefetchSourceLinesC[k], + mode_lib->vba.BytePerPixelDETC[k], + mode_lib->vba.VInitPreFillC[k], + mode_lib->vba.MaxNumSwathC[k], + mode_lib->vba.SwathHeightY[k], + mode_lib->vba.SwathHeightC[k], + TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler[k], + mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.DestinationLinesForPrefetch[k], + &mode_lib->vba.PrefetchBandwidth[k], + &mode_lib->vba.DestinationLinesToRequestVMInVBlank[k], + &mode_lib->vba.DestinationLinesToRequestRowInVBlank[k], + &mode_lib->vba.VRatioPrefetchY[k], + &mode_lib->vba.VRatioPrefetchC[k], + &mode_lib->vba.RequiredPrefetchPixDataBWLuma[k], + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.VStartup[k] = dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[k]); + if (mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata + != 0) { + mode_lib->vba.VStartup[k] = + mode_lib->vba.VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; + } + } else { + mode_lib->vba.VStartup[k] = + dml_min( + mode_lib->vba.VStartupLines, + mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); + } + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + + if (mode_lib->vba.PDEAndMetaPTEBytesFrame[k] == 0) + mode_lib->vba.prefetch_vm_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] > 0) { + mode_lib->vba.prefetch_vm_bw[k] = + (double) mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + / (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_vm_bw[k] = 0; + prefetch_vm_bw_valid = false; + } + if (mode_lib->vba.MetaRowByte[k] + mode_lib->vba.PixelPTEBytesPerRow[k] + == 0) + mode_lib->vba.prefetch_row_bw[k] = 0; + else if (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] > 0) { + mode_lib->vba.prefetch_row_bw[k] = + (double) (mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]) + / (mode_lib->vba.DestinationLinesToRequestRowInVBlank[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } else { + mode_lib->vba.prefetch_row_bw[k] = 0; + prefetch_row_bw_valid = false; + } + + MaxTotalRDBandwidth = + MaxTotalRDBandwidth + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]) + + mode_lib->vba.meta_row_bw[k] + + mode_lib->vba.dpte_row_bw[k])); + + if (mode_lib->vba.DestinationLinesForPrefetch[k] < 2) + DestinationLineTimesForPrefetchLessThan2 = true; + if (mode_lib->vba.VRatioPrefetchY[k] > 4 + || mode_lib->vba.VRatioPrefetchC[k] > 4) + VRatioPrefetchMoreThan4 = true; + } + + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && prefetch_vm_bw_valid + && prefetch_row_bw_valid && !VRatioPrefetchMoreThan4 + && !DestinationLineTimesForPrefetchLessThan2) + mode_lib->vba.PrefetchModeSupported = true; + else { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"); + } + + if (mode_lib->vba.PrefetchModeSupported == true) { + double final_flip_bw[DC__NUM_DPP__MAX]; + unsigned int ImmediateFlipBytes[DC__NUM_DPP__MAX]; + double total_dcn_read_bw_with_flip = 0; + + mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k] + + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBandwidth[k]); + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + ImmediateFlipBytes[k] = 0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesFrame[k] + + mode_lib->vba.MetaRowByte[k] + + mode_lib->vba.PixelPTEBytesPerRow[k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + ImmediateFlipBytes[k]; + } + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.UrgentExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesFrame[k], + mode_lib->vba.MetaRowByte[k], + mode_lib->vba.PixelPTEBytesPerRow[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + total_dcn_read_bw_with_flip = + total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max( + mode_lib->vba.prefetch_vm_bw[k], + dml_max( + mode_lib->vba.prefetch_row_bw[k], + final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidthPlaneLuma[k] + + mode_lib->vba.ReadBandwidthPlaneChroma[k], + mode_lib->vba.RequiredPrefetchPixDataBWLuma[k]))); + } + mode_lib->vba.ImmediateFlipSupported = true; + if (total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { + mode_lib->vba.ImmediateFlipSupported = false; + } + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupported = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupported = false; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ErrorResult[k]) { + mode_lib->vba.PrefetchModeSupported = false; + dml_print( + "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); + } + } + + mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; + } while (!((mode_lib->vba.PrefetchModeSupported + && (!mode_lib->vba.ImmediateFlipSupport + || mode_lib->vba.ImmediateFlipSupported)) + || mode_lib->vba.MaximumMaxVStartupLines < mode_lib->vba.VStartupLines)); + + //Display Pipeline Delivery Time in Prefetch + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.VRatioPrefetchY[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + if (mode_lib->vba.BytePerPixelDETC[k] == 0) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; + } else { + if (mode_lib->vba.VRatioPrefetchC[k] <= 1) { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + * mode_lib->vba.DPPPerPlane[k] + / mode_lib->vba.HRatio[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = + mode_lib->vba.SwathWidthY[k] + / mode_lib->vba.PSCL_THROUGHPUT_LUMA[k] + / mode_lib->vba.DPPCLK[k]; + } + } + } + + // Min TTUVBlank + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = true; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.DRAMClockChangeWatermark, + dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark)); + } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = true; + mode_lib->vba.MinTTUVBlank[k] = dml_max( + mode_lib->vba.StutterEnterPlusExitWatermark, + mode_lib->vba.UrgentWatermark); + } else { + mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k] = false; + mode_lib->vba.AllowDRAMSelfRefreshDuringVBlank[k] = false; + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark; + } + if (!mode_lib->vba.DynamicMetadataEnable[k]) + mode_lib->vba.MinTTUVBlank[k] = mode_lib->vba.TCalc + + mode_lib->vba.MinTTUVBlank[k]; + } + + // DCC Configuration + mode_lib->vba.ActiveDPPs = 0; + // NB P-State/DRAM Clock Change Support + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.ActiveDPPs = mode_lib->vba.ActiveDPPs + mode_lib->vba.DPPPerPlane[k]; + } + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double DPPOutputBufferLinesY; + double DPPOutputBufferLinesC; + double DPPOPPBufferingY; + double MaxDETBufferingTimeY; + double ActiveDRAMClockChangeLatencyMarginY; + + mode_lib->vba.LBLatencyHidingSourceLinesY = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / dml_max( + mode_lib->vba.HRatio[k], + 1.0)), + 1)) - (mode_lib->vba.vtaps[k] - 1); + + mode_lib->vba.LBLatencyHidingSourceLinesC = + dml_min( + mode_lib->vba.MaxLineBufferLines, + (unsigned int) dml_floor( + (double) mode_lib->vba.LineBufferSize + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.SwathWidthY[k] + / 2.0 + / dml_max( + mode_lib->vba.HRatio[k] + / 2, + 1.0)), + 1)) + - (mode_lib->vba.VTAPsChroma[k] - 1); + + EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY + / mode_lib->vba.VRatio[k] + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC + / (mode_lib->vba.VRatio[k] / 2) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]); + + if (mode_lib->vba.SwathWidthY[k] > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = mode_lib->vba.DPPOutputBufferPixels + / mode_lib->vba.SwathWidthY[k]; + } else if (mode_lib->vba.SwathWidthY[k] > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesY = 0.5; + } else { + DPPOutputBufferLinesY = 1; + } + + if (mode_lib->vba.SwathWidthY[k] / 2 > 2 * mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = mode_lib->vba.DPPOutputBufferPixels + / (mode_lib->vba.SwathWidthY[k] / 2); + } else if (mode_lib->vba.SwathWidthY[k] / 2 > mode_lib->vba.DPPOutputBufferPixels) { + DPPOutputBufferLinesC = 0.5; + } else { + DPPOutputBufferLinesC = 1; + } + + DPPOPPBufferingY = (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesY + mode_lib->vba.OPPOutputBufferLines); + MaxDETBufferingTimeY = mode_lib->vba.FullDETBufferingTimeY[k] + + (mode_lib->vba.LinesInDETY[k] + - mode_lib->vba.LinesInDETYRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + + ActiveDRAMClockChangeLatencyMarginY = DPPOPPBufferingY + EffectiveLBLatencyHidingY + + MaxDETBufferingTimeY - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginY = + ActiveDRAMClockChangeLatencyMarginY + - (1 - 1 / (mode_lib->vba.ActiveDPPs - 1)) + * mode_lib->vba.SwathHeightY[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + + if (mode_lib->vba.BytePerPixelDETC[k] > 0) { + double DPPOPPBufferingC = (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * (DPPOutputBufferLinesC + + mode_lib->vba.OPPOutputBufferLines); + double MaxDETBufferingTimeC = + mode_lib->vba.FullDETBufferingTimeC[k] + + (mode_lib->vba.LinesInDETC[k] + - mode_lib->vba.LinesInDETCRoundedDownToSwath[k]) + / mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + double ActiveDRAMClockChangeLatencyMarginC = DPPOPPBufferingC + + EffectiveLBLatencyHidingC + MaxDETBufferingTimeC + - mode_lib->vba.DRAMClockChangeWatermark; + + if (mode_lib->vba.ActiveDPPs > 1) { + ActiveDRAMClockChangeLatencyMarginC = + ActiveDRAMClockChangeLatencyMarginC + - (1 + - 1 + / (mode_lib->vba.ActiveDPPs + - 1)) + * mode_lib->vba.SwathHeightC[k] + * (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]); + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + ActiveDRAMClockChangeLatencyMarginY, + ActiveDRAMClockChangeLatencyMarginC); + } else { + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = + ActiveDRAMClockChangeLatencyMarginY; + } + + if (mode_lib->vba.WritebackEnable[k]) { + double WritebackDRAMClockChangeLatencyMargin; + + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + WritebackDRAMClockChangeLatencyMargin = + (double) (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) + * 4) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else if (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize + * 8.0 / 10, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize + * 8 / 10) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } else { + WritebackDRAMClockChangeLatencyMargin = + dml_min( + (double) mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / (mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k])) + - mode_lib->vba.WritebackDRAMClockChangeWatermark; + } + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min( + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); + } + } + + mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] + < mode_lib->vba.MinActiveDRAMClockChangeMargin) { + mode_lib->vba.MinActiveDRAMClockChangeMargin = + mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + } + } + + mode_lib->vba.MinActiveDRAMClockChangeLatencySupported = + mode_lib->vba.MinActiveDRAMClockChangeMargin + + mode_lib->vba.DRAMClockChangeLatency; + + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; + } else { + if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vblank; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (!mode_lib->vba.AllowDRAMClockChangeDuringVBlank[k]) { + mode_lib->vba.DRAMClockChangeSupport[0][0] = + dm_dram_clock_change_unsupported; + } + } + } else { + mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_unsupported; + } + } + for (k = 0; k <= mode_lib->vba.soc.num_states; k++) + for (j = 0; j < 2; j++) + mode_lib->vba.DRAMClockChangeSupport[k][j] = mode_lib->vba.DRAMClockChangeSupport[0][0]; + + //XFC Parameters: + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.XFCEnabled[k] == true) { + double TWait; + + mode_lib->vba.XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset; + mode_lib->vba.XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth; + mode_lib->vba.XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset; + TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.SREnterPlusExitTime); + mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.SwathWidthY[k], + dml_ceil(mode_lib->vba.BytePerPixelDETY[k], 1), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TCalc, + TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = + dml_floor( + mode_lib->vba.XFCRemoteSurfaceFlipDelay + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCTransferDelay[k] = + dml_ceil( + mode_lib->vba.XFCBusTransportTime + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.XFCPrechargeDelay[k] = + dml_ceil( + (mode_lib->vba.XFCBusTransportTime + + mode_lib->vba.TInitXFill + + mode_lib->vba.TslvChk) + / (mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]), + 1); + mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance + * mode_lib->vba.SrcActiveDrainRate; + mode_lib->vba.FinalFillMargin = + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k] + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.XFCFillConstant; + mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay + * mode_lib->vba.SrcActiveDrainRate + + mode_lib->vba.FinalFillMargin; + mode_lib->vba.RemainingFillLevel = dml_max( + 0.0, + mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel); + mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel + / (mode_lib->vba.SrcActiveDrainRate + * mode_lib->vba.XFCFillBWOverhead / 100); + mode_lib->vba.XFCPrefetchMargin[k] = + mode_lib->vba.XFCRemoteSurfaceFlipDelay + + mode_lib->vba.TFinalxFill + + (mode_lib->vba.DestinationLinesToRequestVMInVBlank[k] + + mode_lib->vba.DestinationLinesToRequestRowInVBlank[k]) + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]; + } else { + mode_lib->vba.XFCSlaveVUpdateOffset[k] = 0; + mode_lib->vba.XFCSlaveVupdateWidth[k] = 0; + mode_lib->vba.XFCSlaveVReadyOffset[k] = 0; + mode_lib->vba.XFCRemoteSurfaceFlipLatency[k] = 0; + mode_lib->vba.XFCPrechargeDelay[k] = 0; + mode_lib->vba.XFCTransferDelay[k] = 0; + mode_lib->vba.XFCPrefetchMargin[k] = 0; + } + } + { + unsigned int VStartupMargin = 0; + bool FirstMainPlane = true; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + unsigned int Margin = (mode_lib->vba.MaxVStartupLines[k] - mode_lib->vba.VStartup[k]) + * mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]; + + if (FirstMainPlane) { + VStartupMargin = Margin; + FirstMainPlane = false; + } else + VStartupMargin = dml_min(VStartupMargin, Margin); + } + + if (mode_lib->vba.UseMaximumVStartup) { + if (mode_lib->vba.VTotal_Max[k] == mode_lib->vba.VTotal[k]) { + //only use max vstart if it is not drr or lateflip. + mode_lib->vba.VStartup[k] = mode_lib->vba.MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]; + } + } + } +} +} + +static void dml20v2_DisplayPipeConfiguration(struct display_mode_lib *mode_lib) +{ + double BytePerPixDETY; + double BytePerPixDETC; + double Read256BytesBlockHeightY; + double Read256BytesBlockHeightC; + double Read256BytesBlockWidthY; + double Read256BytesBlockWidthC; + double MaximumSwathHeightY; + double MaximumSwathHeightC; + double MinimumSwathHeightY; + double MinimumSwathHeightC; + double SwathWidth; + double SwathWidthGranularityY; + double SwathWidthGranularityC; + double RoundedUpMaxSwathSizeBytesY; + double RoundedUpMaxSwathSizeBytesC; + unsigned int j, k; + + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + bool MainPlaneDoesODMCombine = false; + + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + BytePerPixDETY = 8; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + BytePerPixDETY = 4; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + BytePerPixDETY = 2; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + BytePerPixDETY = 1; + BytePerPixDETC = 2; + } else { + BytePerPixDETY = 4.0 / 3.0; + BytePerPixDETC = 8.0 / 3.0; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + Read256BytesBlockHeightY = 4; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) { + Read256BytesBlockHeightY = 8; + } else { + Read256BytesBlockHeightY = 16; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockHeightC = 0; + Read256BytesBlockWidthC = 0; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + Read256BytesBlockHeightY = 1; + Read256BytesBlockHeightC = 1; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + Read256BytesBlockHeightY = 16; + Read256BytesBlockHeightC = 8; + } else { + Read256BytesBlockHeightY = 8; + Read256BytesBlockHeightC = 8; + } + Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1) + / Read256BytesBlockHeightY; + Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2) + / Read256BytesBlockHeightC; + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + MaximumSwathHeightY = Read256BytesBlockHeightY; + MaximumSwathHeightC = Read256BytesBlockHeightC; + } else { + MaximumSwathHeightY = Read256BytesBlockWidthY; + MaximumSwathHeightC = Read256BytesBlockWidthC; + } + + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 + && mode_lib->vba.SourceScan[k] != dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + } + MinimumSwathHeightC = MaximumSwathHeightC; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightY = MaximumSwathHeightY / 2.0; + MinimumSwathHeightC = MaximumSwathHeightC; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + MinimumSwathHeightC = MaximumSwathHeightC / 2.0; + MinimumSwathHeightY = MaximumSwathHeightY; + } else { + MinimumSwathHeightY = MaximumSwathHeightY; + MinimumSwathHeightC = MaximumSwathHeightC; + } + } + + if (mode_lib->vba.SourceScan[k] == dm_horz) { + SwathWidth = mode_lib->vba.ViewportWidth[k]; + } else { + SwathWidth = mode_lib->vba.ViewportHeight[k]; + } + + if (mode_lib->vba.ODMCombineEnabled[k] == true) { + MainPlaneDoesODMCombine = true; + } + for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { + if (mode_lib->vba.BlendingAndTiming[k] == j + && mode_lib->vba.ODMCombineEnabled[j] == true) { + MainPlaneDoesODMCombine = true; + } + } + + if (MainPlaneDoesODMCombine == true) { + SwathWidth = dml_min( + SwathWidth, + mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]); + } else { + if (mode_lib->vba.DPPPerPlane[k] == 0) + SwathWidth = 0; + else + SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k]; + } + + SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY; + RoundedUpMaxSwathSizeBytesY = (dml_ceil( + (double) (SwathWidth - 1), + SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY + * MaximumSwathHeightY; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256) + + 256; + } + if (MaximumSwathHeightC > 0) { + SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2) + / MaximumSwathHeightC; + RoundedUpMaxSwathSizeBytesC = (dml_ceil( + (double) (SwathWidth / 2.0 - 1), + SwathWidthGranularityC) + SwathWidthGranularityC) + * BytePerPixDETC * MaximumSwathHeightC; + if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) { + RoundedUpMaxSwathSizeBytesC = dml_ceil( + RoundedUpMaxSwathSizeBytesC, + 256) + 256; + } + } else + RoundedUpMaxSwathSizeBytesC = 0.0; + + if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC + <= mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0) { + mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC; + } else { + mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY; + mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC; + } + + if (mode_lib->vba.SwathHeightC[k] == 0) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte * 1024; + mode_lib->vba.DETBufferSizeC[k] = 0; + } else if (mode_lib->vba.SwathHeightY[k] <= mode_lib->vba.SwathHeightC[k]) { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 / 2; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 / 2; + } else { + mode_lib->vba.DETBufferSizeY[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 * 2 / 3; + mode_lib->vba.DETBufferSizeC[k] = mode_lib->vba.DETBufferSizeInKByte + * 1024.0 / 3; + } + } +} + +static double CalculateTWait( + unsigned int PrefetchMode, + double DRAMClockChangeLatency, + double UrgentLatencyPixelDataOnly, + double SREnterPlusExitTime) +{ + if (PrefetchMode == 0) { + return dml_max( + DRAMClockChangeLatency + UrgentLatencyPixelDataOnly, + dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly)); + } else if (PrefetchMode == 1) { + return dml_max(SREnterPlusExitTime, UrgentLatencyPixelDataOnly); + } else { + return UrgentLatencyPixelDataOnly; + } +} + +static double CalculateRemoteSurfaceFlipDelay( + struct display_mode_lib *mode_lib, + double VRatio, + double SwathWidth, + double Bpp, + double LineTime, + double XFCTSlvVupdateOffset, + double XFCTSlvVupdateWidth, + double XFCTSlvVreadyOffset, + double XFCXBUFLatencyTolerance, + double XFCFillBWOverhead, + double XFCSlvChunkSize, + double XFCBusTransportTime, + double TCalc, + double TWait, + double *SrcActiveDrainRate, + double *TInitXFill, + double *TslvChk) +{ + double TSlvSetup, AvgfillRate, result; + + *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime; + TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset; + *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100); + AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100); + *TslvChk = XFCSlvChunkSize / AvgfillRate; + dml_print( + "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n", + *SrcActiveDrainRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate); + dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk); + result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide + dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result); + return result; +} + +static double CalculateWriteBackDelay( + enum source_format_class WritebackPixelFormat, + double WritebackHRatio, + double WritebackVRatio, + unsigned int WritebackLumaHTaps, + unsigned int WritebackLumaVTaps, + unsigned int WritebackChromaHTaps, + unsigned int WritebackChromaVTaps, + unsigned int WritebackDestinationWidth) +{ + double CalculateWriteBackDelay = + dml_max( + dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio, + WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1) + * dml_ceil( + WritebackDestinationWidth + / 4.0, + 1) + + dml_ceil(1.0 / WritebackVRatio, 1) + * (dml_ceil( + WritebackLumaVTaps + / 4.0, + 1) + 4)); + + if (WritebackPixelFormat != dm_444_32) { + CalculateWriteBackDelay = + dml_max( + CalculateWriteBackDelay, + dml_max( + dml_ceil( + WritebackChromaHTaps + / 2.0, + 1) + / (2 + * WritebackHRatio), + WritebackChromaVTaps + * dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * dml_ceil( + WritebackDestinationWidth + / 2.0 + / 2.0, + 1) + + dml_ceil( + 1 + / (2 + * WritebackVRatio), + 1) + * (dml_ceil( + WritebackChromaVTaps + / 4.0, + 1) + + 4))); + } + return CalculateWriteBackDelay; +} + +static void CalculateActiveRowBandwidth( + bool GPUVMEnable, + enum source_format_class SourcePixelFormat, + double VRatio, + bool DCCEnable, + double LineTime, + unsigned int MetaRowByteLuma, + unsigned int MetaRowByteChroma, + unsigned int meta_row_height_luma, + unsigned int meta_row_height_chroma, + unsigned int PixelPTEBytesPerRowLuma, + unsigned int PixelPTEBytesPerRowChroma, + unsigned int dpte_row_height_luma, + unsigned int dpte_row_height_chroma, + double *meta_row_bw, + double *dpte_row_bw, + double *qual_row_bw) +{ + if (DCCEnable != true) { + *meta_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + + VRatio / 2 * MetaRowByteChroma + / (meta_row_height_chroma * LineTime); + } else { + *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); + } + + if (GPUVMEnable != true) { + *dpte_row_bw = 0; + } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + + VRatio / 2 * PixelPTEBytesPerRowChroma + / (dpte_row_height_chroma * LineTime); + } else { + *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); + } + + if ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10)) { + *qual_row_bw = *meta_row_bw + *dpte_row_bw; + } else { + *qual_row_bw = 0; + } +} + +static void CalculateFlipSchedule( + struct display_mode_lib *mode_lib, + double UrgentExtraLatency, + double UrgentLatencyPixelDataOnly, + unsigned int GPUVMMaxPageTableLevels, + bool GPUVMEnable, + double BandwidthAvailableForImmediateFlip, + unsigned int TotImmediateFlipBytes, + enum source_format_class SourcePixelFormat, + unsigned int ImmediateFlipBytes, + double LineTime, + double VRatio, + double Tno_bw, + double PDEAndMetaPTEBytesFrame, + unsigned int MetaRowByte, + unsigned int PixelPTEBytesPerRow, + bool DCCEnable, + unsigned int dpte_row_height, + unsigned int meta_row_height, + double qual_row_bw, + double *DestinationLinesToRequestVMInImmediateFlip, + double *DestinationLinesToRequestRowInImmediateFlip, + double *final_flip_bw, + bool *ImmediateFlipSupportedForPipe) +{ + double min_row_time = 0.0; + + if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) { + *DestinationLinesToRequestVMInImmediateFlip = 0.0; + *DestinationLinesToRequestRowInImmediateFlip = 0.0; + *final_flip_bw = qual_row_bw; + *ImmediateFlipSupportedForPipe = true; + } else { + double TimeForFetchingMetaPTEImmediateFlip; + double TimeForFetchingRowInVBlankImmediateFlip; + + if (GPUVMEnable == true) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingMetaPTEImmediateFlip = + dml_max( + Tno_bw + + PDEAndMetaPTEBytesFrame + / mode_lib->vba.ImmediateFlipBW[0], + dml_max( + UrgentExtraLatency + + UrgentLatencyPixelDataOnly + * (GPUVMMaxPageTableLevels + - 1), + LineTime / 4.0)); + } else { + TimeForFetchingMetaPTEImmediateFlip = 0; + } + + *DestinationLinesToRequestVMInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if ((GPUVMEnable == true || DCCEnable == true)) { + mode_lib->vba.ImmediateFlipBW[0] = BandwidthAvailableForImmediateFlip + * ImmediateFlipBytes / TotImmediateFlipBytes; + TimeForFetchingRowInVBlankImmediateFlip = dml_max( + (MetaRowByte + PixelPTEBytesPerRow) + / mode_lib->vba.ImmediateFlipBW[0], + dml_max(UrgentLatencyPixelDataOnly, LineTime / 4.0)); + } else { + TimeForFetchingRowInVBlankImmediateFlip = 0; + } + + *DestinationLinesToRequestRowInImmediateFlip = dml_floor( + 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime + 0.125), + 1) / 4.0; + + if (GPUVMEnable == true) { + *final_flip_bw = + dml_max( + PDEAndMetaPTEBytesFrame + / (*DestinationLinesToRequestVMInImmediateFlip + * LineTime), + (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip + * LineTime)); + } else if (MetaRowByte + PixelPTEBytesPerRow > 0) { + *final_flip_bw = (MetaRowByte + PixelPTEBytesPerRow) + / (TimeForFetchingRowInVBlankImmediateFlip * LineTime); + } else { + *final_flip_bw = 0; + } + + if (GPUVMEnable && !DCCEnable) + min_row_time = dpte_row_height * LineTime / VRatio; + else if (!GPUVMEnable && DCCEnable) + min_row_time = meta_row_height * LineTime / VRatio; + else + min_row_time = dml_min(dpte_row_height, meta_row_height) * LineTime + / VRatio; + + if (*DestinationLinesToRequestVMInImmediateFlip >= 8 + || *DestinationLinesToRequestRowInImmediateFlip >= 16 + || TimeForFetchingMetaPTEImmediateFlip + + 2 * TimeForFetchingRowInVBlankImmediateFlip + > min_row_time) + *ImmediateFlipSupportedForPipe = false; + else + *ImmediateFlipSupportedForPipe = true; + } +} + +static unsigned int TruncToValidBPP( + double DecimalBPP, + bool DSCEnabled, + enum output_encoder_class Output, + enum output_format_class Format, + unsigned int DSCInputBitPerComponent) +{ + if (Output == dm_hdmi) { + if (Format == dm_420) { + if (DecimalBPP >= 18) + return 18; + else if (DecimalBPP >= 15) + return 15; + else if (DecimalBPP >= 12) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_444) { + if (DecimalBPP >= 36) + return 36; + else if (DecimalBPP >= 30) + return 30; + else if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 18) + return 18; + else + return BPP_INVALID; + } else { + if (DecimalBPP / 1.5 >= 24) + return 24; + else if (DecimalBPP / 1.5 >= 20) + return 20; + else if (DecimalBPP / 1.5 >= 16) + return 16; + else + return BPP_INVALID; + } + } else { + if (DSCEnabled) { + if (Format == dm_420) { + if (DecimalBPP < 6) + return BPP_INVALID; + else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1 / 16) + return 1.5 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } else if (Format == dm_n422) { + if (DecimalBPP < 7) + return BPP_INVALID; + else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1 / 16) + return 2 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } else { + if (DecimalBPP < 8) + return BPP_INVALID; + else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1 / 16) + return 3 * DSCInputBitPerComponent - 1 / 16; + else + return dml_floor(16 * DecimalBPP, 1) / 16; + } + } else if (Format == dm_420) { + if (DecimalBPP >= 18) + return 18; + else if (DecimalBPP >= 15) + return 15; + else if (DecimalBPP >= 12) + return 12; + else + return BPP_INVALID; + } else if (Format == dm_s422 || Format == dm_n422) { + if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 20) + return 20; + else if (DecimalBPP >= 16) + return 16; + else + return BPP_INVALID; + } else { + if (DecimalBPP >= 36) + return 36; + else if (DecimalBPP >= 30) + return 30; + else if (DecimalBPP >= 24) + return 24; + else if (DecimalBPP >= 18) + return 18; + else + return BPP_INVALID; + } + } +} + +void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) +{ + struct vba_vars_st *locals = &mode_lib->vba; + + int i; + unsigned int j, k, m; + + /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + + /*Scale Ratio, taps Support Check*/ + + mode_lib->vba.ScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ScalerEnabled[k] == false + && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) + || mode_lib->vba.HRatio[k] != 1.0 + || mode_lib->vba.htaps[k] != 1.0 + || mode_lib->vba.VRatio[k] != 1.0 + || mode_lib->vba.vtaps[k] != 1.0)) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0 + || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0 + || (mode_lib->vba.htaps[k] > 1.0 + && (mode_lib->vba.htaps[k] % 2) == 1) + || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio + || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio + || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k] + || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k] + || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8 + && (mode_lib->vba.HRatio[k] / 2.0 + > mode_lib->vba.HTAPsChroma[k] + || mode_lib->vba.VRatio[k] / 2.0 + > mode_lib->vba.VTAPsChroma[k]))) { + mode_lib->vba.ScaleRatioAndTapsSupport = false; + } + } + /*Source Format, Pixel Format and Scan Support Check*/ + + mode_lib->vba.SourceFormatPixelAndScanSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + && mode_lib->vba.SourceScan[k] != dm_horz) + || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t + || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d + || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x) + && mode_lib->vba.SourcePixelFormat[k] != dm_444_64) + || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x + && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)) + || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_gfx7_2d_thin_lvp) + && !((mode_lib->vba.SourcePixelFormat[k] + == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] + == dm_444_32) + && mode_lib->vba.SourceScan[k] + == dm_horz + && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp + == true + && mode_lib->vba.DCCEnable[k] + == false)) + || (mode_lib->vba.DCCEnable[k] == true + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_linear + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_8 + || mode_lib->vba.SourcePixelFormat[k] + == dm_420_10)))) { + mode_lib->vba.SourceFormatPixelAndScanSupport = false; + } + } + /*Bandwidth Support Check*/ + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { + locals->BytePerPixelInDETY[k] = 8.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { + locals->BytePerPixelInDETY[k] = 4.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { + locals->BytePerPixelInDETY[k] = 2.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 0.0; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { + locals->BytePerPixelInDETY[k] = 1.0; + locals->BytePerPixelInDETC[k] = 2.0; + } else { + locals->BytePerPixelInDETY[k] = 4.0 / 3; + locals->BytePerPixelInDETC[k] = 8.0 / 3; + } + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k]; + } else { + locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k]; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; + locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0) + / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0; + locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k]; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 4.0; + } else if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 3.0; + } else if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k] + * mode_lib->vba.WritebackDestinationHeight[k] + / (mode_lib->vba.WritebackSourceHeight[k] + * mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k]) * 1.5; + } else { + locals->WriteBandwidth[k] = 0.0; + } + } + mode_lib->vba.DCCEnabledInAnyPlane = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.DCCEnable[k] == true) { + mode_lib->vba.DCCEnabledInAnyPlane = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->FabricAndDRAMBandwidthPerState[i] = dml_min( + mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels + * mode_lib->vba.DRAMChannelWidth, + mode_lib->vba.FabricClockPerState[i] + * mode_lib->vba.FabricDatapathToDCNDataReturn) / 1000; + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * locals->DCFCLKPerState[i], + locals->FabricAndDRAMBandwidthPerState[i] * 1000) + * locals->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100; + + locals->ReturnBWPerState[i] = locals->ReturnBWToDCNPerState; + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + + locals->ReturnBWToDCNPerState = dml_min(locals->ReturnBusWidth * + locals->DCFCLKPerState[i], locals->FabricAndDRAMBandwidthPerState[i] * 1000); + + if (locals->DCCEnabledInAnyPlane == true && locals->ReturnBWToDCNPerState > locals->DCFCLKPerState[i] * locals->ReturnBusWidth / 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + locals->ReturnBWToDCNPerState * 4 * (1 - locals->UrgentLatency / + ((locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + / (locals->ReturnBWToDCNPerState - locals->DCFCLKPerState[i] + * locals->ReturnBusWidth / 4) + locals->UrgentLatency))); + } + locals->CriticalPoint = 2 * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * + locals->UrgentLatency / (locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024); + + if (locals->DCCEnabledInAnyPlane && locals->CriticalPoint > 1 && locals->CriticalPoint < 4) { + locals->ReturnBWPerState[i] = dml_min(locals->ReturnBWPerState[i], + 4 * locals->ReturnBWToDCNPerState * + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024 + * locals->ReturnBusWidth * locals->DCFCLKPerState[i] * locals->UrgentLatency / + dml_pow((locals->ReturnBWToDCNPerState * locals->UrgentLatency + + (locals->ROBBufferSizeInKByte - locals->PixelChunkSizeInKByte) * 1024), 2)); + } + } + /*Writeback Latency support check*/ + + mode_lib->vba.WritebackLatencySupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { + if (locals->WriteBandwidth[k] + > (mode_lib->vba.WritebackInterfaceLumaBufferSize + + mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } else { + if (locals->WriteBandwidth[k] + > 1.5 + * dml_min( + mode_lib->vba.WritebackInterfaceLumaBufferSize, + 2.0 + * mode_lib->vba.WritebackInterfaceChromaBufferSize) + / mode_lib->vba.WritebackLatency) { + mode_lib->vba.WritebackLatencySupport = false; + } + } + } + } + /*Re-ordering Buffer Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] = + (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i] + + locals->UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i]; + if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i] + > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) { + locals->ROBSupport[i] = true; + } else { + locals->ROBSupport[i] = false; + } + } + /*Writeback Mode Support Check*/ + + mode_lib->vba.TotalNumberOfActiveWriteback = 0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0) + mode_lib->vba.ActiveWritebacksPerPlane[k] = 1; + mode_lib->vba.TotalNumberOfActiveWriteback = + mode_lib->vba.TotalNumberOfActiveWriteback + + mode_lib->vba.ActiveWritebacksPerPlane[k]; + } + } + mode_lib->vba.WritebackModeSupport = true; + if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) { + mode_lib->vba.WritebackModeSupport = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true + && mode_lib->vba.Writeback10bpc420Supported != true + && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) { + mode_lib->vba.WritebackModeSupport = false; + } + } + /*Writeback Scale Ratio and Taps Support Check*/ + + mode_lib->vba.WritebackScaleRatioAndTapsSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false + && (mode_lib->vba.WritebackHRatio[k] != 1.0 + || mode_lib->vba.WritebackVRatio[k] != 1.0)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackMaxVSCLRatio + || mode_lib->vba.WritebackHRatio[k] + < mode_lib->vba.WritebackMinHSCLRatio + || mode_lib->vba.WritebackVRatio[k] + < mode_lib->vba.WritebackMinVSCLRatio + || mode_lib->vba.WritebackLumaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackLumaHTaps[k] + || mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackLumaVTaps[k] + || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackLumaHTaps[k] % 2) + == 1)) + || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32 + && (mode_lib->vba.WritebackChromaHTaps[k] + > mode_lib->vba.WritebackMaxHSCLTaps + || mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackMaxVSCLTaps + || 2.0 + * mode_lib->vba.WritebackHRatio[k] + > mode_lib->vba.WritebackChromaHTaps[k] + || 2.0 + * mode_lib->vba.WritebackVRatio[k] + > mode_lib->vba.WritebackChromaVTaps[k] + || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0 + && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (mode_lib->vba.WritebackVRatio[k] < 1.0) { + mode_lib->vba.WritebackLumaVExtra = + dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0); + } else { + mode_lib->vba.WritebackLumaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32 + && mode_lib->vba.WritebackLumaVTaps[k] + > (mode_lib->vba.WritebackLineBufferLumaBufferSize + + mode_lib->vba.WritebackLineBufferChromaBufferSize) + / 3.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackLumaVTaps[k] + > mode_lib->vba.WritebackLineBufferLumaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackLumaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) { + mode_lib->vba.WritebackChromaVExtra = 0.0; + } else { + mode_lib->vba.WritebackChromaVExtra = -1; + } + if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra) + || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10 + && mode_lib->vba.WritebackChromaVTaps[k] + > mode_lib->vba.WritebackLineBufferChromaBufferSize + * 8.0 / 10.0 + / mode_lib->vba.WritebackDestinationWidth[k] + - mode_lib->vba.WritebackChromaVExtra)) { + mode_lib->vba.WritebackScaleRatioAndTapsSupport = false; + } + } + } + /*Maximum DISPCLK/DPPCLK Support check*/ + + mode_lib->vba.WritebackRequiredDISPCLK = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.WritebackEnable[k] == true) { + mode_lib->vba.WritebackRequiredDISPCLK = + dml_max( + mode_lib->vba.WritebackRequiredDISPCLK, + CalculateWriteBackDISPCLK( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.PixelClock[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.WritebackChromaLineBufferWidth)); + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.HRatio[k] > 1.0) { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / dml_ceil( + mode_lib->vba.htaps[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + if (locals->BytePerPixelInDETC[k] == 0.0) { + locals->PSCL_FACTOR_CHROMA[k] = 0.0; + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max3( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } else { + if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) { + locals->PSCL_FACTOR_CHROMA[k] = + dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput + * mode_lib->vba.HRatio[k] + / 2.0 + / dml_ceil( + mode_lib->vba.HTAPsChroma[k] + / 6.0, + 1.0)); + } else { + locals->PSCL_FACTOR_CHROMA[k] = dml_min( + mode_lib->vba.MaxDCHUBToPSCLThroughput, + mode_lib->vba.MaxPSCLToLBThroughput); + } + locals->MinDPPCLKUsingSingleDPP[k] = + mode_lib->vba.PixelClock[k] + * dml_max5( + mode_lib->vba.vtaps[k] / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k]), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / locals->PSCL_FACTOR[k], + mode_lib->vba.VTAPsChroma[k] + / 6.0 + * dml_min( + 1.0, + mode_lib->vba.HRatio[k] + / 2.0), + mode_lib->vba.HRatio[k] + * mode_lib->vba.VRatio[k] + / 4.0 + / locals->PSCL_FACTOR_CHROMA[k], + 1.0); + if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0 + || mode_lib->vba.HTAPsChroma[k] > 6.0 + || mode_lib->vba.VTAPsChroma[k] > 6.0) + && locals->MinDPPCLKUsingSingleDPP[k] + < 2.0 * mode_lib->vba.PixelClock[k]) { + locals->MinDPPCLKUsingSingleDPP[k] = 2.0 + * mode_lib->vba.PixelClock[k]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + Calculate256BBlockSizes( + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + dml_ceil(locals->BytePerPixelInDETC[k], 2.0), + &locals->Read256BlockHeightY[k], + &locals->Read256BlockHeightC[k], + &locals->Read256BlockWidthY[k], + &locals->Read256BlockWidthC[k]); + if (mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k]; + } else { + locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k]; + locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k]; + } + if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_32 + || mode_lib->vba.SourcePixelFormat[k] == dm_444_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16 + || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear + || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64 + && (mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_4kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_t + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_64kb_s_x + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s + || mode_lib->vba.SurfaceTiling[k] + == dm_sw_var_s_x) + && mode_lib->vba.SourceScan[k] == dm_horz)) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + } + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else { + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k] + / 2.0; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10 + && mode_lib->vba.SourceScan[k] == dm_horz) { + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k] + / 2.0; + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + } else { + locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]; + locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]; + } + } + if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) { + mode_lib->vba.MaximumSwathWidthSupport = 8192.0; + } else { + mode_lib->vba.MaximumSwathWidthSupport = 5120.0; + } + mode_lib->vba.MaximumSwathWidthInDETBuffer = + dml_min( + mode_lib->vba.MaximumSwathWidthSupport, + mode_lib->vba.DETBufferSizeInKByte * 1024.0 / 2.0 + / (locals->BytePerPixelInDETY[k] + * locals->MinSwathHeightY[k] + + locals->BytePerPixelInDETC[k] + / 2.0 + * locals->MinSwathHeightC[k])); + if (locals->BytePerPixelInDETC[k] == 0.0) { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + mode_lib->vba.LineBufferSize + * dml_max(mode_lib->vba.HRatio[k], 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)); + } else { + mode_lib->vba.MaximumSwathWidthInLineBuffer = + dml_min( + mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k], + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.vtaps[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k], + 1.0) + - 2, + 0.0)), + 2.0 * mode_lib->vba.LineBufferSize + * dml_max( + mode_lib->vba.HRatio[k] + / 2.0, + 1.0) + / mode_lib->vba.LBBitPerPixel[k] + / (mode_lib->vba.VTAPsChroma[k] + + dml_max( + dml_ceil( + mode_lib->vba.VRatio[k] + / 2.0, + 1.0) + - 2, + 0.0))); + } + locals->MaximumSwathWidth[k] = dml_min( + mode_lib->vba.MaximumSwathWidthInDETBuffer, + mode_lib->vba.MaximumSwathWidthInLineBuffer); + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDispclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown( + mode_lib->vba.MaxDppclk[i], + mode_lib->vba.DISPCLKDPPCLKVCOSpeed); + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = + mode_lib->vba.PixelClock[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i] + && i == mode_lib->vba.soc.num_states) + mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2 + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + if (mode_lib->vba.ODMCapability == false || mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine <= mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) { + locals->ODMCombineEnablePerState[i][k] = false; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine; + } else { + locals->ODMCombineEnablePerState[i][k] = true; + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine; + } + if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k] + && locals->ODMCombineEnablePerState[i][k] == false) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = + locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity) + || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + if (j == 1) { + while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP + && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) { + double BWOfNonSplitPlaneOfMaximumBandwidth; + unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; + + BWOfNonSplitPlaneOfMaximumBandwidth = 0; + NumberOfNonSplitPlaneOfMaximumBandwidth = 0; + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) { + BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k]; + NumberOfNonSplitPlaneOfMaximumBandwidth = k; + } + } + locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; + locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = + locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] + * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1; + } + } + if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) { + locals->RequiredDISPCLK[i][j] = 0.0; + locals->DISPCLK_DPPCLK_Support[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->ODMCombineEnablePerState[i][k] = false; + if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) { + locals->NoOfDPP[i][j][k] = 1; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } else { + locals->NoOfDPP[i][j][k] = 2; + locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; + } + if (i != mode_lib->vba.soc.num_states) { + mode_lib->vba.PlaneRequiredDISPCLK = + mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0); + } else { + mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k] + * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.PlaneRequiredDISPCLK); + if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) + > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity + || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity) + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + locals->TotalNumberOfActiveDPP[i][j] = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) + locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + locals->RequiredDISPCLK[i][j] = dml_max( + locals->RequiredDISPCLK[i][j], + mode_lib->vba.WritebackRequiredDISPCLK); + if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity + < mode_lib->vba.WritebackRequiredDISPCLK) { + locals->DISPCLK_DPPCLK_Support[i][j] = false; + } + } + } + /*Viewport Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->ViewportSizeSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == true) { + if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k])) + > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i] = false; + } + } else { + if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) { + locals->ViewportSizeSupport[i] = false; + } + } + } + } + /*Total Available Pipes Support Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP) + locals->TotalAvailablePipesSupport[i][j] = true; + else + locals->TotalAvailablePipesSupport[i][j] = false; + } + } + /*Total Available OTG Support Check*/ + + mode_lib->vba.TotalNumberOfActiveOTG = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + + 1.0; + } + } + if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) { + mode_lib->vba.NumberOfOTGSupport = true; + } else { + mode_lib->vba.NumberOfOTGSupport = false; + } + /*Display IO and DSC Support Check*/ + + mode_lib->vba.NonsupportedDSCInputBPC = false; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0 + || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) { + mode_lib->vba.NonsupportedDSCInputBPC = true; + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.Output[k] == dm_hdmi) { + locals->RequiresDSC[i][k] = 0; + locals->RequiresFEC[i][k] = 0; + locals->OutputBppPerState[i][k] = TruncToValidBPP( + dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + } else if (mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp) { + if (mode_lib->vba.Output[k] == dm_edp) { + mode_lib->vba.EffectiveFECOverhead = 0.0; + } else { + mode_lib->vba.EffectiveFECOverhead = + mode_lib->vba.FECOverhead; + } + if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp; + } + if (mode_lib->vba.Outbpp == BPP_INVALID + && mode_lib->vba.PHYCLKPerState[i] + >= 810.0) { + mode_lib->vba.Outbpp = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + false, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + mode_lib->vba.OutbppDSC = TruncToValidBPP( + (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0 + * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0, + true, + mode_lib->vba.Output[k], + mode_lib->vba.OutputFormat[k], + mode_lib->vba.DSCInputBitPerComponent[k]); + if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) { + locals->RequiresDSC[i][k] = true; + if (mode_lib->vba.Output[k] == dm_dp) { + locals->RequiresFEC[i][k] = true; + } else { + locals->RequiresFEC[i][k] = false; + } + mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC; + } else { + locals->RequiresDSC[i][k] = false; + locals->RequiresFEC[i][k] = false; + } + locals->OutputBppPerState[i][k] = + mode_lib->vba.Outbpp; + } + } + } else { + locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->DIOSupport[i] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->OutputBppPerState[i][k] == BPP_INVALID + || (mode_lib->vba.OutputFormat[k] == dm_420 + && mode_lib->vba.Interlace[k] == true + && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true)) { + locals->DIOSupport[i] = false; + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->DSCCLKRequiredMoreThanSupported[i] = false; + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if ((mode_lib->vba.Output[k] == dm_dp + || mode_lib->vba.Output[k] == dm_edp)) { + if (mode_lib->vba.OutputFormat[k] == dm_420 + || mode_lib->vba.OutputFormat[k] + == dm_n422) { + mode_lib->vba.DSCFormatFactor = 2; + } else { + mode_lib->vba.DSCFormatFactor = 1; + } + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] + == true) { + if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } else { + if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor + > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) { + locals->DSCCLKRequiredMoreThanSupported[i] = + true; + } + } + } + } + } + } + } + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + locals->NotEnoughDSCUnits[i] = false; + mode_lib->vba.TotalDSCUnitsRequired = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->RequiresDSC[i][k] == true) { + if (locals->ODMCombineEnablePerState[i][k] == true) { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 2.0; + } else { + mode_lib->vba.TotalDSCUnitsRequired = + mode_lib->vba.TotalDSCUnitsRequired + 1.0; + } + } + } + if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) { + locals->NotEnoughDSCUnits[i] = true; + } + } + /*DSC Delay per state*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] != k) { + mode_lib->vba.slices = 0; + } else if (locals->RequiresDSC[i][k] == 0 + || locals->RequiresDSC[i][k] == false) { + mode_lib->vba.slices = 0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) { + mode_lib->vba.slices = dml_ceil( + mode_lib->vba.PixelClockBackEnd[k] / 400.0, + 4.0); + } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) { + mode_lib->vba.slices = 8.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) { + mode_lib->vba.slices = 4.0; + } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) { + mode_lib->vba.slices = 2.0; + } else { + mode_lib->vba.slices = 1.0; + } + if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE + || locals->OutputBppPerState[i][k] == BPP_INVALID) { + mode_lib->vba.bpp = 0.0; + } else { + mode_lib->vba.bpp = locals->OutputBppPerState[i][k]; + } + if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) { + if (locals->ODMCombineEnablePerState[i][k] == false) { + locals->DSCDelayPerState[i][k] = + dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil( + mode_lib->vba.HActive[k] + / mode_lib->vba.slices, + 1.0), + mode_lib->vba.slices, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay( + mode_lib->vba.OutputFormat[k]); + } else { + locals->DSCDelayPerState[i][k] = + 2.0 * (dscceComputeDelay( + mode_lib->vba.DSCInputBitPerComponent[k], + mode_lib->vba.bpp, + dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0), + mode_lib->vba.slices / 2, + mode_lib->vba.OutputFormat[k]) + + dscComputeDelay(mode_lib->vba.OutputFormat[k])); + } + locals->DSCDelayPerState[i][k] = + locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k]; + } else { + locals->DSCDelayPerState[i][k] = 0.0; + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) { + if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true) + locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m]; + } + } + } + } + + //Prefetch Check + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->ODMCombineEnablePerState[i][k] == true) + locals->SwathWidthYPerState[i][j][k] = dml_min(locals->SwathWidthYSingleDPP[k], dml_round(locals->HActive[k] / 2 * locals->HRatio[k])); + else + locals->SwathWidthYPerState[i][j][k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k]; + locals->SwathWidthGranularityY = 256 / dml_ceil(locals->BytePerPixelInDETY[k], 1) / locals->MaxSwathHeightY[k]; + locals->RoundedUpMaxSwathSizeBytesY = (dml_ceil(locals->SwathWidthYPerState[i][j][k] - 1, locals->SwathWidthGranularityY) + + locals->SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k]; + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesY = dml_ceil(locals->RoundedUpMaxSwathSizeBytesY, 256) + 256; + } + if (locals->MaxSwathHeightC[k] > 0) { + locals->SwathWidthGranularityC = 256 / dml_ceil(locals->BytePerPixelInDETC[k], 2) / locals->MaxSwathHeightC[k]; + + locals->RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYPerState[i][j][k] / 2 - 1, locals->SwathWidthGranularityC) + + locals->SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k]; + } + if (locals->SourcePixelFormat[k] == dm_420_10) { + locals->RoundedUpMaxSwathSizeBytesC = dml_ceil(locals->RoundedUpMaxSwathSizeBytesC, 256) + 256; + } else { + locals->RoundedUpMaxSwathSizeBytesC = 0; + } + + if (locals->RoundedUpMaxSwathSizeBytesY + locals->RoundedUpMaxSwathSizeBytesC <= locals->DETBufferSizeInKByte * 1024 / 2) { + locals->SwathHeightYPerState[i][j][k] = locals->MaxSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MaxSwathHeightC[k]; + } else { + locals->SwathHeightYPerState[i][j][k] = locals->MinSwathHeightY[k]; + locals->SwathHeightCPerState[i][j][k] = locals->MinSwathHeightC[k]; + } + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = 0; + } else if (locals->SwathHeightYPerState[i][j][k] <= locals->SwathHeightCPerState[i][j][k]) { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETY[k] / + locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 2 / locals->BytePerPixelInDETC[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } else { + locals->LinesInDETLuma = locals->DETBufferSizeInKByte * 1024 * 2 / 3 / locals->BytePerPixelInDETY[k] / locals->SwathWidthYPerState[i][j][k]; + locals->LinesInDETChroma = locals->DETBufferSizeInKByte * 1024 / 3 / locals->BytePerPixelInDETY[k] / (locals->SwathWidthYPerState[i][j][k] / 2); + } + + locals->EffectiveLBLatencyHidingSourceLinesLuma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] / (locals->SwathWidthYPerState[i][j][k] + / dml_max(locals->HRatio[k], 1)), 1)) - (locals->vtaps[k] - 1); + + locals->EffectiveLBLatencyHidingSourceLinesChroma = dml_min(locals->MaxLineBufferLines, + dml_floor(locals->LineBufferSize / locals->LBBitPerPixel[k] + / (locals->SwathWidthYPerState[i][j][k] / 2 + / dml_max(locals->HRatio[k] / 2, 1)), 1)) - (locals->VTAPsChroma[k] - 1); + + locals->EffectiveDETLBLinesLuma = dml_floor(locals->LinesInDETLuma + dml_min( + locals->LinesInDETLuma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETY[k] * + locals->PSCL_FACTOR[k] / locals->ReturnBWPerState[i], + locals->EffectiveLBLatencyHidingSourceLinesLuma), + locals->SwathHeightYPerState[i][j][k]); + + locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min( + locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] * + locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i], + locals->EffectiveLBLatencyHidingSourceLinesChroma), + locals->SwathHeightCPerState[i][j][k]); + + if (locals->BytePerPixelInDETC[k] == 0) { + locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]); + } else { + locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min( + locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k]) + / locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] * + dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k]), + locals->EffectiveDETLBLinesChroma * (locals->HTotal[k] / locals->PixelClock[k]) / (locals->VRatio[k] / 2) - + locals->EffectiveDETLBLinesChroma * locals->SwathWidthYPerState[i][j][k] / 2 * + dml_ceil(locals->BytePerPixelInDETC[k], 2) / (locals->ReturnBWPerState[i] / locals->NoOfDPP[i][j][k])); + } + } + } + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->UrgentLatencySupport[i][j] = true; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->UrgentLatencySupportUsPerState[i][j][k] < locals->UrgentLatency) + locals->UrgentLatencySupport[i][j] = false; + } + } + } + + + /*Prefetch Check*/ + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->TotalNumberOfDCCActiveDPP[i][j] = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + if (locals->DCCEnable[k] == true) { + locals->TotalNumberOfDCCActiveDPP[i][j] = + locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k]; + } + } + } + } + + CalculateMinAndMaxPrefetchMode(locals->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &locals->MinPrefetchMode, &locals->MaxPrefetchMode); + + locals->MaxTotalVActiveRDBandwidth = 0; + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->MaxTotalVActiveRDBandwidth = locals->MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k]; + } + + for (i = 0; i <= locals->soc.num_states; i++) { + for (j = 0; j < 2; j++) { + for (k = 0; k < locals->NumberOfActivePlanes; k++) { + locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k]; + locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k]; + locals->SwathHeightYThisState[k] = locals->SwathHeightYPerState[i][j][k]; + locals->SwathHeightCThisState[k] = locals->SwathHeightCPerState[i][j][k]; + locals->SwathWidthYThisState[k] = locals->SwathWidthYPerState[i][j][k]; + mode_lib->vba.ProjectedDCFCLKDeepSleep = dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.PixelClock[k] / 16.0); + if (mode_lib->vba.BytePerPixelInDETC[k] == 0.0) { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 64.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } else { + if (mode_lib->vba.VRatio[k] <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.HRatio[k] + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETY[k], + 1.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + if (mode_lib->vba.VRatio[k] / 2.0 <= 1.0) { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.HRatio[k] + / 2.0 + * mode_lib->vba.PixelClock[k] + / mode_lib->vba.NoOfDPP[i][j][k]); + } else { + mode_lib->vba.ProjectedDCFCLKDeepSleep = + dml_max( + mode_lib->vba.ProjectedDCFCLKDeepSleep, + 1.1 + * dml_ceil( + mode_lib->vba.BytePerPixelInDETC[k], + 2.0) + / 32.0 + * mode_lib->vba.PSCL_FACTOR_CHROMA[k] + * mode_lib->vba.RequiredDPPCLK[i][j][k]); + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETY[k], 1.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k], + mode_lib->vba.ViewportHeight[k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchY[k], + mode_lib->vba.DCCMetaPitchY[k], + &mode_lib->vba.MacroTileWidthY[k], + &mode_lib->vba.MetaRowBytesY, + &mode_lib->vba.DPTEBytesPerRowY, + &mode_lib->vba.PTEBufferSizeNotExceededY[i][j][k], + &mode_lib->vba.dpte_row_height[k], + &mode_lib->vba.meta_row_height[k]); + mode_lib->vba.PrefetchLinesY[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k], + mode_lib->vba.vtaps[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.ViewportYStartY[k], + &mode_lib->vba.PrefillY[k], + &mode_lib->vba.MaxNumSwY[k]); + if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( + mode_lib, + mode_lib->vba.DCCEnable[k], + mode_lib->vba.Read256BlockHeightY[k], + mode_lib->vba.Read256BlockWidthY[k], + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.SurfaceTiling[k], + dml_ceil(mode_lib->vba.BytePerPixelInDETC[k], 2.0), + mode_lib->vba.SourceScan[k], + mode_lib->vba.ViewportWidth[k] / 2.0, + mode_lib->vba.ViewportHeight[k] / 2.0, + mode_lib->vba.SwathWidthYPerState[i][j][k] / 2.0, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.VMMPageSize, + mode_lib->vba.PTEBufferSizeInRequestsLuma, + mode_lib->vba.PDEProcessingBufIn64KBReqs, + mode_lib->vba.PitchC[k], + 0.0, + &mode_lib->vba.MacroTileWidthC[k], + &mode_lib->vba.MetaRowBytesC, + &mode_lib->vba.DPTEBytesPerRowC, + &mode_lib->vba.PTEBufferSizeNotExceededC[i][j][k], + &mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_height_chroma[k]); + mode_lib->vba.PrefetchLinesC[k] = CalculatePrefetchSourceLines( + mode_lib, + mode_lib->vba.VRatio[k] / 2.0, + mode_lib->vba.VTAPsChroma[k], + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.ViewportYStartC[k], + &mode_lib->vba.PrefillC[k], + &mode_lib->vba.MaxNumSwC[k]); + } else { + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0; + mode_lib->vba.MetaRowBytesC = 0.0; + mode_lib->vba.DPTEBytesPerRowC = 0.0; + locals->PrefetchLinesC[k] = 0.0; + locals->PTEBufferSizeNotExceededC[i][j][k] = true; + locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; + } + locals->PDEAndMetaPTEBytesPerFrame[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC; + locals->MetaRowBytes[k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC; + locals->DPTEBytesPerRow[k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC; + + CalculateActiveRowBandwidth( + mode_lib->vba.GPUVMEnable, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.MetaRowBytesY, + mode_lib->vba.MetaRowBytesC, + mode_lib->vba.meta_row_height[k], + mode_lib->vba.meta_row_height_chroma[k], + mode_lib->vba.DPTEBytesPerRowY, + mode_lib->vba.DPTEBytesPerRowC, + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.dpte_row_height_chroma[k], + &mode_lib->vba.meta_row_bw[k], + &mode_lib->vba.dpte_row_bw[k], + &mode_lib->vba.qual_row_bw[k]); + } + mode_lib->vba.ExtraLatency = + mode_lib->vba.UrgentRoundTripAndOutOfOrderLatencyPerState[i] + + (mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PixelChunkSizeInKByte + + mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j] + * mode_lib->vba.MetaChunkSize) + * 1024.0 + / mode_lib->vba.ReturnBWPerState[i]; + if (mode_lib->vba.GPUVMEnable == true) { + mode_lib->vba.ExtraLatency = mode_lib->vba.ExtraLatency + + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + * mode_lib->vba.PTEGroupSize + / mode_lib->vba.ReturnBWPerState[i]; + } + mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep; + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + if (mode_lib->vba.WritebackEnable[k] == true) { + locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency + + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[k], + mode_lib->vba.WritebackHRatio[k], + mode_lib->vba.WritebackVRatio[k], + mode_lib->vba.WritebackLumaHTaps[k], + mode_lib->vba.WritebackLumaVTaps[k], + mode_lib->vba.WritebackChromaHTaps[k], + mode_lib->vba.WritebackChromaVTaps[k], + mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j]; + } else { + locals->WritebackDelay[i][k] = 0.0; + } + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[m] == k + && mode_lib->vba.WritebackEnable[m] + == true) { + locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k], + mode_lib->vba.WritebackLatency + CalculateWriteBackDelay( + mode_lib->vba.WritebackPixelFormat[m], + mode_lib->vba.WritebackHRatio[m], + mode_lib->vba.WritebackVRatio[m], + mode_lib->vba.WritebackLumaHTaps[m], + mode_lib->vba.WritebackLumaVTaps[m], + mode_lib->vba.WritebackChromaHTaps[m], + mode_lib->vba.WritebackChromaVTaps[m], + mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]); + } + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) { + if (mode_lib->vba.BlendingAndTiming[k] == m) { + locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m]; + } + } + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (m = 0; m < locals->NumberOfCursors[k]; m++) + locals->cursor_bw[k] = locals->NumberOfCursors[k] * locals->CursorWidth[k][m] * locals->CursorBPP[k][m] + / 8 / (locals->HTotal[k] / locals->PixelClock[k]) * locals->VRatio[k]; + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->MaximumVStartup[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] + - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0)); + } + + mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode; + do { + mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode; + mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1; + + mode_lib->vba.TWait = CalculateTWait( + mode_lib->vba.PrefetchMode[i][j], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + + if (mode_lib->vba.XFCEnabled[k] == true) { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = + CalculateRemoteSurfaceFlipDelay( + mode_lib, + mode_lib->vba.VRatio[k], + locals->SwathWidthYPerState[i][j][k], + dml_ceil(locals->BytePerPixelInDETY[k], 1.0), + mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], + mode_lib->vba.XFCTSlvVupdateOffset, + mode_lib->vba.XFCTSlvVupdateWidth, + mode_lib->vba.XFCTSlvVreadyOffset, + mode_lib->vba.XFCXBUFLatencyTolerance, + mode_lib->vba.XFCFillBWOverhead, + mode_lib->vba.XFCSlvChunkSize, + mode_lib->vba.XFCBusTransportTime, + mode_lib->vba.TimeCalc, + mode_lib->vba.TWait, + &mode_lib->vba.SrcActiveDrainRate, + &mode_lib->vba.TInitXFill, + &mode_lib->vba.TslvChk); + } else { + mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0; + } + + CalculateDelayAfterScaler(mode_lib, mode_lib->vba.ReturnBWPerState[i], mode_lib->vba.ReadBandwidthLuma[k], mode_lib->vba.ReadBandwidthChroma[k], mode_lib->vba.MaxTotalVActiveRDBandwidth, + mode_lib->vba.DisplayPipeLineDeliveryTimeLuma[k], mode_lib->vba.DisplayPipeLineDeliveryTimeChroma[k], + mode_lib->vba.RequiredDPPCLK[i][j][k], mode_lib->vba.RequiredDISPCLK[i][j], mode_lib->vba.PixelClock[k], mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.NoOfDPP[i][j][k], mode_lib->vba.ScalerEnabled[k], mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.DPPCLKDelaySubtotal, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelayCNVCCursor, mode_lib->vba.DISPCLKDelaySubtotal, + mode_lib->vba.SwathWidthYPerState[i][j][k] / mode_lib->vba.HRatio[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.HTotal[k], + mode_lib->vba.SwathWidthYSingleDPP[k], mode_lib->vba.BytePerPixelInDETY[k], mode_lib->vba.BytePerPixelInDETC[k], mode_lib->vba.SwathHeightYThisState[k], mode_lib->vba.SwathHeightCThisState[k], mode_lib->vba.Interlace[k], mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + &mode_lib->vba.DSTXAfterScaler[k], &mode_lib->vba.DSTYAfterScaler[k]); + + mode_lib->vba.IsErrorResult[i][j][k] = + CalculatePrefetchSchedule( + mode_lib, + mode_lib->vba.RequiredDPPCLK[i][j][k], + mode_lib->vba.RequiredDISPCLK[i][j], + mode_lib->vba.PixelClock[k], + mode_lib->vba.ProjectedDCFCLKDeepSleep, + mode_lib->vba.NoOfDPP[i][j][k], + mode_lib->vba.NumberOfCursors[k], + mode_lib->vba.VTotal[k] + - mode_lib->vba.VActive[k], + mode_lib->vba.HTotal[k], + mode_lib->vba.MaxInterDCNTileRepeaters, + mode_lib->vba.MaximumVStartup[k], + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.DynamicMetadataEnable[k], + mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], + mode_lib->vba.DynamicMetadataTransmittedBytes[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.ExtraLatency, + mode_lib->vba.TimeCalc, + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], + mode_lib->vba.MetaRowBytes[k], + mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.PrefetchLinesY[k], + mode_lib->vba.SwathWidthYPerState[i][j][k], + mode_lib->vba.BytePerPixelInDETY[k], + mode_lib->vba.PrefillY[k], + mode_lib->vba.MaxNumSwY[k], + mode_lib->vba.PrefetchLinesC[k], + mode_lib->vba.BytePerPixelInDETC[k], + mode_lib->vba.PrefillC[k], + mode_lib->vba.MaxNumSwC[k], + mode_lib->vba.SwathHeightYPerState[i][j][k], + mode_lib->vba.SwathHeightCPerState[i][j][k], + mode_lib->vba.TWait, + mode_lib->vba.XFCEnabled[k], + mode_lib->vba.XFCRemoteSurfaceFlipDelay, + mode_lib->vba.Interlace[k], + mode_lib->vba.ProgressiveToInterlaceUnitInOPP, + mode_lib->vba.DSTXAfterScaler[k], + mode_lib->vba.DSTYAfterScaler[k], + &mode_lib->vba.LineTimesForPrefetch[k], + &mode_lib->vba.PrefetchBW[k], + &mode_lib->vba.LinesForMetaPTE[k], + &mode_lib->vba.LinesForMetaAndDPTERow[k], + &mode_lib->vba.VRatioPreY[i][j][k], + &mode_lib->vba.VRatioPreC[i][j][k], + &mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k], + &mode_lib->vba.Tno_bw[k], + &mode_lib->vba.VUpdateOffsetPix[k], + &mode_lib->vba.VUpdateWidthPix[k], + &mode_lib->vba.VReadyOffsetPix[k]); + } + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; + locals->prefetch_vm_bw_valid = true; + locals->prefetch_row_bw_valid = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PDEAndMetaPTEBytesPerFrame[k] == 0) + locals->prefetch_vm_bw[k] = 0; + else if (locals->LinesForMetaPTE[k] > 0) + locals->prefetch_vm_bw[k] = locals->PDEAndMetaPTEBytesPerFrame[k] + / (locals->LinesForMetaPTE[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_vm_bw[k] = 0; + locals->prefetch_vm_bw_valid = false; + } + if (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k] == 0) + locals->prefetch_row_bw[k] = 0; + else if (locals->LinesForMetaAndDPTERow[k] > 0) + locals->prefetch_row_bw[k] = (locals->MetaRowBytes[k] + locals->DPTEBytesPerRow[k]) + / (locals->LinesForMetaAndDPTERow[k] * locals->HTotal[k] / locals->PixelClock[k]); + else { + locals->prefetch_row_bw[k] = 0; + locals->prefetch_row_bw_valid = false; + } + + mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]; + mode_lib->vba.MaximumReadBandwidthWithPrefetch = + mode_lib->vba.MaximumReadBandwidthWithPrefetch + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + dml_max(mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k]) + + mode_lib->vba.meta_row_bw[k] + mode_lib->vba.dpte_row_bw[k]); + } + locals->BandwidthWithoutPrefetchSupported[i] = true; + if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i]) { + locals->BandwidthWithoutPrefetchSupported[i] = false; + } + + locals->PrefetchSupported[i][j] = true; + if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i]) { + locals->PrefetchSupported[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->LineTimesForPrefetch[k] < 2.0 + || locals->LinesForMetaPTE[k] >= 8.0 + || locals->LinesForMetaAndDPTERow[k] >= 16.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->PrefetchSupported[i][j] = false; + } + } + locals->VRatioInPrefetchSupported[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->VRatioPreY[i][j][k] > 4.0 + || locals->VRatioPreC[i][j][k] > 4.0 + || mode_lib->vba.IsErrorResult[i][j][k] == true) { + locals->VRatioInPrefetchSupported[i][j] = false; + } + } + } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) + && mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode); + + if (mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.ReturnBWPerState[i]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.BandwidthAvailableForImmediateFlip = + mode_lib->vba.BandwidthAvailableForImmediateFlip + - mode_lib->vba.cursor_bw[k] + - dml_max( + mode_lib->vba.ReadBandwidth[k] + mode_lib->vba.qual_row_bw[k], + mode_lib->vba.PrefetchBW[k]); + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.ImmediateFlipBytes[k] = 0.0; + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.ImmediateFlipBytes[k] = + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k] + + mode_lib->vba.MetaRowBytes[k] + + mode_lib->vba.DPTEBytesPerRow[k]; + } + } + mode_lib->vba.TotImmediateFlipBytes = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 + && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { + mode_lib->vba.TotImmediateFlipBytes = + mode_lib->vba.TotImmediateFlipBytes + + mode_lib->vba.ImmediateFlipBytes[k]; + } + } + + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + CalculateFlipSchedule( + mode_lib, + mode_lib->vba.ExtraLatency, + mode_lib->vba.UrgentLatencyPixelDataOnly, + mode_lib->vba.GPUVMMaxPageTableLevels, + mode_lib->vba.GPUVMEnable, + mode_lib->vba.BandwidthAvailableForImmediateFlip, + mode_lib->vba.TotImmediateFlipBytes, + mode_lib->vba.SourcePixelFormat[k], + mode_lib->vba.ImmediateFlipBytes[k], + mode_lib->vba.HTotal[k] + / mode_lib->vba.PixelClock[k], + mode_lib->vba.VRatio[k], + mode_lib->vba.Tno_bw[k], + mode_lib->vba.PDEAndMetaPTEBytesPerFrame[k], + mode_lib->vba.MetaRowBytes[k], + mode_lib->vba.DPTEBytesPerRow[k], + mode_lib->vba.DCCEnable[k], + mode_lib->vba.dpte_row_height[k], + mode_lib->vba.meta_row_height[k], + mode_lib->vba.qual_row_bw[k], + &mode_lib->vba.DestinationLinesToRequestVMInImmediateFlip[k], + &mode_lib->vba.DestinationLinesToRequestRowInImmediateFlip[k], + &mode_lib->vba.final_flip_bw[k], + &mode_lib->vba.ImmediateFlipSupportedForPipe[k]); + } + mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.total_dcn_read_bw_with_flip = + mode_lib->vba.total_dcn_read_bw_with_flip + + mode_lib->vba.cursor_bw[k] + + dml_max3( + mode_lib->vba.prefetch_vm_bw[k], + mode_lib->vba.prefetch_row_bw[k], + mode_lib->vba.final_flip_bw[k] + + dml_max( + mode_lib->vba.ReadBandwidth[k], + mode_lib->vba.RequiredPrefetchPixelDataBWLuma[i][j][k])); + } + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = true; + if (mode_lib->vba.total_dcn_read_bw_with_flip + > mode_lib->vba.ReturnBWPerState[i]) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.ImmediateFlipSupportedForPipe[k] == false) { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } else { + mode_lib->vba.ImmediateFlipSupportedForState[i][j] = false; + } + } + } + + /*Vertical Active BW support*/ + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i] = dml_min(mode_lib->vba.ReturnBusWidth * + mode_lib->vba.DCFCLKPerState[i], mode_lib->vba.FabricAndDRAMBandwidthPerState[i] * 1000) * + mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100; + if (mode_lib->vba.MaxTotalVActiveRDBandwidth <= mode_lib->vba.MaxTotalVerticalActiveAvailableBandwidth[i]) + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = true; + else + mode_lib->vba.TotalVerticalActiveBandwidthSupport[i] = false; + } + + /*PTE Buffer Size Check*/ + + for (i = 0; i <= mode_lib->vba.soc.num_states; i++) { + for (j = 0; j < 2; j++) { + locals->PTEBufferSizeNotExceeded[i][j] = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (locals->PTEBufferSizeNotExceededY[i][j][k] == false + || locals->PTEBufferSizeNotExceededC[i][j][k] == false) { + locals->PTEBufferSizeNotExceeded[i][j] = false; + } + } + } + } + /*Cursor Support Check*/ + mode_lib->vba.CursorSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.CursorWidth[k][j] > 0.0) { + if (dml_floor( + dml_floor( + mode_lib->vba.CursorBufferSize + - mode_lib->vba.CursorChunkSize, + mode_lib->vba.CursorChunkSize) * 1024.0 + / (mode_lib->vba.CursorWidth[k][j] + * mode_lib->vba.CursorBPP[k][j] + / 8.0), + 1.0) + * (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) + / mode_lib->vba.VRatio[k] < mode_lib->vba.UrgentLatencyPixelDataOnly + || (mode_lib->vba.CursorBPP[k][j] == 64.0 + && mode_lib->vba.Cursor64BppSupport == false)) { + mode_lib->vba.CursorSupport = false; + } + } + } + } + /*Valid Pitch Check*/ + + mode_lib->vba.PitchSupport = true; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + locals->AlignedYPitch[k] = dml_ceil( + dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]), + locals->MacroTileWidthY[k]); + if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.DCCEnable[k] == true) { + locals->AlignedDCCMetaPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.DCCMetaPitchY[k], + mode_lib->vba.ViewportWidth[k]), + 64.0 * locals->Read256BlockWidthY[k]); + } else { + locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k]; + } + if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) { + mode_lib->vba.PitchSupport = false; + } + if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 + && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16 + && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) { + locals->AlignedCPitch[k] = dml_ceil( + dml_max( + mode_lib->vba.PitchC[k], + mode_lib->vba.ViewportWidth[k] / 2.0), + locals->MacroTileWidthC[k]); + } else { + locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k]; + } + if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) { + mode_lib->vba.PitchSupport = false; + } + } + /*Mode Support, Voltage State and SOC Configuration*/ + + for (i = mode_lib->vba.soc.num_states; i >= 0; i--) { + for (j = 0; j < 2; j++) { + enum dm_validation_status status = DML_VALIDATION_OK; + + if (mode_lib->vba.ScaleRatioAndTapsSupport != true) { + status = DML_FAIL_SCALE_RATIO_TAP; + } else if (mode_lib->vba.SourceFormatPixelAndScanSupport != true) { + status = DML_FAIL_SOURCE_PIXEL_FORMAT; + } else if (locals->ViewportSizeSupport[i] != true) { + status = DML_FAIL_VIEWPORT_SIZE; + } else if (locals->DIOSupport[i] != true) { + status = DML_FAIL_DIO_SUPPORT; + } else if (locals->NotEnoughDSCUnits[i] != false) { + status = DML_FAIL_NOT_ENOUGH_DSC; + } else if (locals->DSCCLKRequiredMoreThanSupported[i] != false) { + status = DML_FAIL_DSC_CLK_REQUIRED; + } else if (locals->UrgentLatencySupport[i][j] != true) { + status = DML_FAIL_URGENT_LATENCY; + } else if (locals->ROBSupport[i] != true) { + status = DML_FAIL_REORDERING_BUFFER; + } else if (locals->DISPCLK_DPPCLK_Support[i][j] != true) { + status = DML_FAIL_DISPCLK_DPPCLK; + } else if (locals->TotalAvailablePipesSupport[i][j] != true) { + status = DML_FAIL_TOTAL_AVAILABLE_PIPES; + } else if (mode_lib->vba.NumberOfOTGSupport != true) { + status = DML_FAIL_NUM_OTG; + } else if (mode_lib->vba.WritebackModeSupport != true) { + status = DML_FAIL_WRITEBACK_MODE; + } else if (mode_lib->vba.WritebackLatencySupport != true) { + status = DML_FAIL_WRITEBACK_LATENCY; + } else if (mode_lib->vba.WritebackScaleRatioAndTapsSupport != true) { + status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; + } else if (mode_lib->vba.CursorSupport != true) { + status = DML_FAIL_CURSOR_SUPPORT; + } else if (mode_lib->vba.PitchSupport != true) { + status = DML_FAIL_PITCH_SUPPORT; + } else if (locals->PrefetchSupported[i][j] != true) { + status = DML_FAIL_PREFETCH_SUPPORT; + } else if (locals->TotalVerticalActiveBandwidthSupport[i] != true) { + status = DML_FAIL_TOTAL_V_ACTIVE_BW; + } else if (locals->VRatioInPrefetchSupported[i][j] != true) { + status = DML_FAIL_V_RATIO_PREFETCH; + } else if (locals->PTEBufferSizeNotExceeded[i][j] != true) { + status = DML_FAIL_PTE_BUFFER_SIZE; + } else if (mode_lib->vba.NonsupportedDSCInputBPC != false) { + status = DML_FAIL_DSC_INPUT_BPC; + } + + if (status == DML_VALIDATION_OK) { + locals->ModeSupport[i][j] = true; + } else { + locals->ModeSupport[i][j] = false; + } + locals->ValidationStatus[i] = status; + } + } + { + unsigned int MaximumMPCCombine = 0; + mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1; + for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) { + if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) { + mode_lib->vba.VoltageLevel = i; + if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false + || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible)) { + MaximumMPCCombine = 1; + } else { + MaximumMPCCombine = 0; + } + break; + } + } + mode_lib->vba.ImmediateFlipSupport = + locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k]; + } + mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine]; + mode_lib->vba.maxMpcComb = MaximumMPCCombine; + } + mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel]; + mode_lib->vba.FabricAndDRAMBandwidth = locals->FabricAndDRAMBandwidthPerState[mode_lib->vba.VoltageLevel]; + for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { + if (mode_lib->vba.BlendingAndTiming[k] == k) { + mode_lib->vba.ODMCombineEnabled[k] = + locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k]; + } else { + mode_lib->vba.ODMCombineEnabled[k] = 0; + } + mode_lib->vba.DSCEnabled[k] = + locals->RequiresDSC[mode_lib->vba.VoltageLevel][k]; + mode_lib->vba.OutputBpp[k] = + locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k]; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h new file mode 100644 index 000000000000..a989d3ca1e99 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.h @@ -0,0 +1,32 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _DCN20V2_DISPLAY_MODE_VBA_H_ +#define _DCN20V2_DISPLAY_MODE_VBA_H_ + +void dml20v2_recalculate(struct display_mode_lib *mode_lib); +void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c new file mode 100644 index 000000000000..ed8bf5f723c9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -0,0 +1,1701 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "../display_mode_lib.h" +#include "../display_mode_vba.h" +#include "display_rq_dlg_calc_20v2.h" + +// Function: dml20v2_rq_dlg_get_rq_params +// Calculate requestor related parameters that register definition agnostic +// (i.e. this layer does try to separate real values from register definition) +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_param - values that can be used to setup RQ (e.g. swath_height, plane1_addr, etc.) +// +static void dml20v2_rq_dlg_get_rq_params( + struct display_mode_lib *mode_lib, + display_rq_params_st * rq_param, + const display_pipe_source_params_st pipe_src_param); + +// Function: dml20v2_rq_dlg_get_dlg_params +// Calculate deadline related parameters +// +static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st rq_dlg_param, + const display_dlg_sys_params_st dlg_sys_param, + const bool cstate_en, + const bool pstate_en); +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp); + +#include "../dml_inline_defs.h" + +static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) +{ + unsigned int ret_val = 0; + + if (source_format == dm_444_16) { + if (!is_chroma) + ret_val = 2; + } else if (source_format == dm_444_32) { + if (!is_chroma) + ret_val = 4; + } else if (source_format == dm_444_64) { + if (!is_chroma) + ret_val = 8; + } else if (source_format == dm_420_8) { + if (is_chroma) + ret_val = 2; + else + ret_val = 1; + } else if (source_format == dm_420_10) { + if (is_chroma) + ret_val = 4; + else + ret_val = 2; + } else if (source_format == dm_444_8) { + ret_val = 1; + } + return ret_val; +} + +static bool is_dual_plane(enum source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dm_420_8) || (source_format == dm_420_10)) + ret_val = 1; + + return ret_val; +} + +static double get_refcyc_per_delivery(struct display_mode_lib *mode_lib, + double refclk_freq_in_mhz, + double pclk_freq_in_mhz, + bool odm_combine, + unsigned int recout_width, + unsigned int hactive, + double vratio, + double hscale_pixel_rate, + unsigned int delivery_width, + unsigned int req_per_swath_ub) +{ + double refcyc_per_delivery = 0.0; + + if (vratio <= 1.0) { + if (odm_combine) + refcyc_per_delivery = (double) refclk_freq_in_mhz + * dml_min((double) recout_width, (double) hactive / 2.0) + / pclk_freq_in_mhz / (double) req_per_swath_ub; + else + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) recout_width + / pclk_freq_in_mhz / (double) req_per_swath_ub; + } else { + refcyc_per_delivery = (double) refclk_freq_in_mhz * (double) delivery_width + / (double) hscale_pixel_rate / (double) req_per_swath_ub; + } + + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: recout_width = %d\n", __func__, recout_width); + dml_print("DML_DLG: %s: vratio = %3.2f\n", __func__, vratio); + dml_print("DML_DLG: %s: req_per_swath_ub = %d\n", __func__, req_per_swath_ub); + dml_print("DML_DLG: %s: refcyc_per_delivery= %3.2f\n", __func__, refcyc_per_delivery); + + return refcyc_per_delivery; + +} + +static unsigned int get_blk_size_bytes(const enum source_macro_tile_size tile_size) +{ + if (tile_size == dm_256k_tile) + return (256 * 1024); + else if (tile_size == dm_64k_tile) + return (64 * 1024); + else + return (4 * 1024); +} + +static void extract_rq_sizing_regs(struct display_mode_lib *mode_lib, + display_data_rq_regs_st *rq_regs, + const display_data_rq_sizing_params_st rq_sizing) +{ + dml_print("DML_DLG: %s: rq_sizing param\n", __func__); + print__data_rq_sizing_params_st(mode_lib, rq_sizing); + + rq_regs->chunk_size = dml_log2(rq_sizing.chunk_bytes) - 10; + + if (rq_sizing.min_chunk_bytes == 0) + rq_regs->min_chunk_size = 0; + else + rq_regs->min_chunk_size = dml_log2(rq_sizing.min_chunk_bytes) - 8 + 1; + + rq_regs->meta_chunk_size = dml_log2(rq_sizing.meta_chunk_bytes) - 10; + if (rq_sizing.min_meta_chunk_bytes == 0) + rq_regs->min_meta_chunk_size = 0; + else + rq_regs->min_meta_chunk_size = dml_log2(rq_sizing.min_meta_chunk_bytes) - 6 + 1; + + rq_regs->dpte_group_size = dml_log2(rq_sizing.dpte_group_bytes) - 6; + rq_regs->mpte_group_size = dml_log2(rq_sizing.mpte_group_bytes) - 6; +} + +static void extract_rq_regs(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_rq_params_st rq_param) +{ + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + unsigned int detile_buf_plane1_addr = 0; + + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_l), rq_param.sizing.rq_l); + + rq_regs->rq_regs_l.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_l.dpte_row_height), + 1) - 3; + + if (rq_param.yuv420) { + extract_rq_sizing_regs(mode_lib, &(rq_regs->rq_regs_c), rq_param.sizing.rq_c); + rq_regs->rq_regs_c.pte_row_height_linear = dml_floor(dml_log2(rq_param.dlg.rq_c.dpte_row_height), + 1) - 3; + } + + rq_regs->rq_regs_l.swath_height = dml_log2(rq_param.dlg.rq_l.swath_height); + rq_regs->rq_regs_c.swath_height = dml_log2(rq_param.dlg.rq_c.swath_height); + + // FIXME: take the max between luma, chroma chunk size? + // okay for now, as we are setting chunk_bytes to 8kb anyways + if (rq_param.sizing.rq_l.chunk_bytes >= 32 * 1024) { //32kb + rq_regs->drq_expansion_mode = 0; + } else { + rq_regs->drq_expansion_mode = 2; + } + rq_regs->prq_expansion_mode = 1; + rq_regs->mrq_expansion_mode = 1; + rq_regs->crq_expansion_mode = 1; + + if (rq_param.yuv420) { + if ((double) rq_param.misc.rq_l.stored_swath_bytes + / (double) rq_param.misc.rq_c.stored_swath_bytes <= 1.5) { + detile_buf_plane1_addr = (detile_buf_size_in_bytes / 2.0 / 64.0); // half to chroma + } else { + detile_buf_plane1_addr = dml_round_to_multiple((unsigned int) ((2.0 * detile_buf_size_in_bytes) / 3.0), + 256, + 0) / 64.0; // 2/3 to chroma + } + } + rq_regs->plane1_base_address = detile_buf_plane1_addr; +} + +static void handle_det_buf_split(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st pipe_src_param) +{ + unsigned int total_swath_bytes = 0; + unsigned int swath_bytes_l = 0; + unsigned int swath_bytes_c = 0; + unsigned int full_swath_bytes_packed_l = 0; + unsigned int full_swath_bytes_packed_c = 0; + bool req128_l = 0; + bool req128_c = 0; + bool surf_linear = (pipe_src_param.sw_mode == dm_sw_linear); + bool surf_vert = (pipe_src_param.source_scan == dm_vert); + unsigned int log2_swath_height_l = 0; + unsigned int log2_swath_height_c = 0; + unsigned int detile_buf_size_in_bytes = mode_lib->ip.det_buffer_size_kbytes * 1024; + + full_swath_bytes_packed_l = rq_param->misc.rq_l.full_swath_bytes; + full_swath_bytes_packed_c = rq_param->misc.rq_c.full_swath_bytes; + + if (rq_param->yuv420_10bpc) { + full_swath_bytes_packed_l = dml_round_to_multiple(rq_param->misc.rq_l.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + full_swath_bytes_packed_c = dml_round_to_multiple(rq_param->misc.rq_c.full_swath_bytes * 2 / 3, + 256, + 1) + 256; + } + + if (rq_param->yuv420) { + total_swath_bytes = 2 * full_swath_bytes_packed_l + 2 * full_swath_bytes_packed_c; + + if (total_swath_bytes <= detile_buf_size_in_bytes) { //full 256b request + req128_l = 0; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l; + swath_bytes_c = full_swath_bytes_packed_c; + } else { //128b request (for luma only for yuv420 8bpc) + req128_l = 1; + req128_c = 0; + swath_bytes_l = full_swath_bytes_packed_l / 2; + swath_bytes_c = full_swath_bytes_packed_c; + } + // Note: assumption, the config that pass in will fit into + // the detiled buffer. + } else { + total_swath_bytes = 2 * full_swath_bytes_packed_l; + + if (total_swath_bytes <= detile_buf_size_in_bytes) + req128_l = 0; + else + req128_l = 1; + + swath_bytes_l = total_swath_bytes; + swath_bytes_c = 0; + } + rq_param->misc.rq_l.stored_swath_bytes = swath_bytes_l; + rq_param->misc.rq_c.stored_swath_bytes = swath_bytes_c; + + if (surf_linear) { + log2_swath_height_l = 0; + log2_swath_height_c = 0; + } else if (!surf_vert) { + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_height) - req128_l; + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_height) - req128_c; + } else { + log2_swath_height_l = dml_log2(rq_param->misc.rq_l.blk256_width) - req128_l; + log2_swath_height_c = dml_log2(rq_param->misc.rq_c.blk256_width) - req128_c; + } + rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; + rq_param->dlg.rq_c.swath_height = 1 << log2_swath_height_c; + + dml_print("DML_DLG: %s: req128_l = %0d\n", __func__, req128_l); + dml_print("DML_DLG: %s: req128_c = %0d\n", __func__, req128_c); + dml_print("DML_DLG: %s: full_swath_bytes_packed_l = %0d\n", + __func__, + full_swath_bytes_packed_l); + dml_print("DML_DLG: %s: full_swath_bytes_packed_c = %0d\n", + __func__, + full_swath_bytes_packed_c); +} + +static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + display_data_rq_sizing_params_st *rq_sizing_param, + unsigned int vp_width, + unsigned int vp_height, + unsigned int data_pitch, + unsigned int meta_pitch, + unsigned int source_format, + unsigned int tiling, + unsigned int macro_tile_size, + unsigned int source_scan, + unsigned int is_chroma) +{ + bool surf_linear = (tiling == dm_sw_linear); + bool surf_vert = (source_scan == dm_vert); + + unsigned int bytes_per_element; + unsigned int bytes_per_element_y = get_bytes_per_element((enum source_format_class)(source_format), + false); + unsigned int bytes_per_element_c = get_bytes_per_element((enum source_format_class)(source_format), + true); + + unsigned int blk256_width = 0; + unsigned int blk256_height = 0; + + unsigned int blk256_width_y = 0; + unsigned int blk256_height_y = 0; + unsigned int blk256_width_c = 0; + unsigned int blk256_height_c = 0; + unsigned int log2_bytes_per_element; + unsigned int log2_blk256_width; + unsigned int log2_blk256_height; + unsigned int blk_bytes; + unsigned int log2_blk_bytes; + unsigned int log2_blk_height; + unsigned int log2_blk_width; + unsigned int log2_meta_req_bytes; + unsigned int log2_meta_req_height; + unsigned int log2_meta_req_width; + unsigned int meta_req_width; + unsigned int meta_req_height; + unsigned int log2_meta_row_height; + unsigned int meta_row_width_ub; + unsigned int log2_meta_chunk_bytes; + unsigned int log2_meta_chunk_height; + + //full sized meta chunk width in unit of data elements + unsigned int log2_meta_chunk_width; + unsigned int log2_min_meta_chunk_bytes; + unsigned int min_meta_chunk_width; + unsigned int meta_chunk_width; + unsigned int meta_chunk_per_row_int; + unsigned int meta_row_remainder; + unsigned int meta_chunk_threshold; + unsigned int meta_blk_bytes; + unsigned int meta_blk_height; + unsigned int meta_blk_width; + unsigned int meta_surface_bytes; + unsigned int vmpg_bytes; + unsigned int meta_pte_req_per_frame_ub; + unsigned int meta_pte_bytes_per_frame_ub; + const unsigned int log2_vmpg_bytes = dml_log2(mode_lib->soc.vmm_page_size_bytes); + const unsigned int dpte_buf_in_pte_reqs = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; + const unsigned int pde_proc_buffer_size_64k_reqs = + mode_lib->ip.pde_proc_buffer_size_64k_reqs; + + unsigned int log2_vmpg_height = 0; + unsigned int log2_vmpg_width = 0; + unsigned int log2_dpte_req_height_ptes = 0; + unsigned int log2_dpte_req_height = 0; + unsigned int log2_dpte_req_width = 0; + unsigned int log2_dpte_row_height_linear = 0; + unsigned int log2_dpte_row_height = 0; + unsigned int log2_dpte_group_width = 0; + unsigned int dpte_row_width_ub = 0; + unsigned int dpte_req_height = 0; + unsigned int dpte_req_width = 0; + unsigned int dpte_group_width = 0; + unsigned int log2_dpte_group_bytes = 0; + unsigned int log2_dpte_group_length = 0; + unsigned int pde_buf_entries; + bool yuv420 = (source_format == dm_420_8 || source_format == dm_420_10); + + Calculate256BBlockSizes((enum source_format_class)(source_format), + (enum dm_swizzle_mode)(tiling), + bytes_per_element_y, + bytes_per_element_c, + &blk256_height_y, + &blk256_height_c, + &blk256_width_y, + &blk256_width_c); + + if (!is_chroma) { + blk256_width = blk256_width_y; + blk256_height = blk256_height_y; + bytes_per_element = bytes_per_element_y; + } else { + blk256_width = blk256_width_c; + blk256_height = blk256_height_c; + bytes_per_element = bytes_per_element_c; + } + + log2_bytes_per_element = dml_log2(bytes_per_element); + + dml_print("DML_DLG: %s: surf_linear = %d\n", __func__, surf_linear); + dml_print("DML_DLG: %s: surf_vert = %d\n", __func__, surf_vert); + dml_print("DML_DLG: %s: blk256_width = %d\n", __func__, blk256_width); + dml_print("DML_DLG: %s: blk256_height = %d\n", __func__, blk256_height); + + log2_blk256_width = dml_log2((double) blk256_width); + log2_blk256_height = dml_log2((double) blk256_height); + blk_bytes = surf_linear ? + 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); + log2_blk_bytes = dml_log2((double) blk_bytes); + log2_blk_height = 0; + log2_blk_width = 0; + + // remember log rule + // "+" in log is multiply + // "-" in log is divide + // "/2" is like square root + // blk is vertical biased + if (tiling != dm_sw_linear) + log2_blk_height = log2_blk256_height + + dml_ceil((double) (log2_blk_bytes - 8) / 2.0, 1); + else + log2_blk_height = 0; // blk height of 1 + + log2_blk_width = log2_blk_bytes - log2_bytes_per_element - log2_blk_height; + + if (!surf_vert) { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_width - 1, blk256_width, 1) + + blk256_width; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_width; + } else { + rq_dlg_param->swath_width_ub = dml_round_to_multiple(vp_height - 1, blk256_height, 1) + + blk256_height; + rq_dlg_param->req_per_swath_ub = rq_dlg_param->swath_width_ub >> log2_blk256_height; + } + + if (!surf_vert) + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_height + * bytes_per_element; + else + rq_misc_param->full_swath_bytes = rq_dlg_param->swath_width_ub * blk256_width + * bytes_per_element; + + rq_misc_param->blk256_height = blk256_height; + rq_misc_param->blk256_width = blk256_width; + + // ------- + // meta + // ------- + log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element + + // each 64b meta request for dcn is 8x8 meta elements and + // a meta element covers one 256b block of the the data surface. + log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 + log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element + - log2_meta_req_height; + meta_req_width = 1 << log2_meta_req_width; + meta_req_height = 1 << log2_meta_req_height; + log2_meta_row_height = 0; + meta_row_width_ub = 0; + + // the dimensions of a meta row are meta_row_width x meta_row_height in elements. + // calculate upper bound of the meta_row_width + if (!surf_vert) { + log2_meta_row_height = log2_meta_req_height; + meta_row_width_ub = dml_round_to_multiple(vp_width - 1, meta_req_width, 1) + + meta_req_width; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_width; + } else { + log2_meta_row_height = log2_meta_req_width; + meta_row_width_ub = dml_round_to_multiple(vp_height - 1, meta_req_height, 1) + + meta_req_height; + rq_dlg_param->meta_req_per_row_ub = meta_row_width_ub / meta_req_height; + } + rq_dlg_param->meta_bytes_per_row_ub = rq_dlg_param->meta_req_per_row_ub * 64; + + rq_dlg_param->meta_row_height = 1 << log2_meta_row_height; + + log2_meta_chunk_bytes = dml_log2(rq_sizing_param->meta_chunk_bytes); + log2_meta_chunk_height = log2_meta_row_height; + + //full sized meta chunk width in unit of data elements + log2_meta_chunk_width = log2_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height; + log2_min_meta_chunk_bytes = dml_log2(rq_sizing_param->min_meta_chunk_bytes); + min_meta_chunk_width = 1 + << (log2_min_meta_chunk_bytes + 8 - log2_bytes_per_element + - log2_meta_chunk_height); + meta_chunk_width = 1 << log2_meta_chunk_width; + meta_chunk_per_row_int = (unsigned int) (meta_row_width_ub / meta_chunk_width); + meta_row_remainder = meta_row_width_ub % meta_chunk_width; + meta_chunk_threshold = 0; + meta_blk_bytes = 4096; + meta_blk_height = blk256_height * 64; + meta_blk_width = meta_blk_bytes * 256 / bytes_per_element / meta_blk_height; + meta_surface_bytes = meta_pitch + * (dml_round_to_multiple(vp_height - 1, meta_blk_height, 1) + meta_blk_height) + * bytes_per_element / 256; + vmpg_bytes = mode_lib->soc.vmm_page_size_bytes; + meta_pte_req_per_frame_ub = (dml_round_to_multiple(meta_surface_bytes - vmpg_bytes, + 8 * vmpg_bytes, + 1) + 8 * vmpg_bytes) / (8 * vmpg_bytes); + meta_pte_bytes_per_frame_ub = meta_pte_req_per_frame_ub * 64; //64B mpte request + rq_dlg_param->meta_pte_bytes_per_frame_ub = meta_pte_bytes_per_frame_ub; + + dml_print("DML_DLG: %s: meta_blk_height = %d\n", __func__, meta_blk_height); + dml_print("DML_DLG: %s: meta_blk_width = %d\n", __func__, meta_blk_width); + dml_print("DML_DLG: %s: meta_surface_bytes = %d\n", __func__, meta_surface_bytes); + dml_print("DML_DLG: %s: meta_pte_req_per_frame_ub = %d\n", + __func__, + meta_pte_req_per_frame_ub); + dml_print("DML_DLG: %s: meta_pte_bytes_per_frame_ub = %d\n", + __func__, + meta_pte_bytes_per_frame_ub); + + if (!surf_vert) + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width; + else + meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height; + + if (meta_row_remainder <= meta_chunk_threshold) + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; + else + rq_dlg_param->meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; + + // ------ + // dpte + // ------ + if (surf_linear) { + log2_vmpg_height = 0; // one line high + } else { + log2_vmpg_height = (log2_vmpg_bytes - 8) / 2 + log2_blk256_height; + } + log2_vmpg_width = log2_vmpg_bytes - log2_bytes_per_element - log2_vmpg_height; + + // only 3 possible shapes for dpte request in dimensions of ptes: 8x1, 4x2, 2x4. + if (surf_linear) { //one 64B PTE request returns 8 PTEs + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_vmpg_width + 3; + log2_dpte_req_height = 0; + } else if (log2_blk_bytes == 12) { //4KB tile means 4kB page size + //one 64B req gives 8x1 PTEs for 4KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } else if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) { // tile block >= 64KB + //two 64B reqs of 2x4 PTEs give 16 PTEs to cover 64KB + log2_dpte_req_height_ptes = 4; + log2_dpte_req_width = log2_blk256_width + 4; // log2_64KB_width + log2_dpte_req_height = log2_blk256_height + 4; // log2_64KB_height + } else { //64KB page size and must 64KB tile block + //one 64B req gives 8x1 PTEs for 64KB tile + log2_dpte_req_height_ptes = 0; + log2_dpte_req_width = log2_blk_width + 3; + log2_dpte_req_height = log2_blk_height + 0; + } + + // The dpte request dimensions in data elements is dpte_req_width x dpte_req_height + // log2_vmpg_width is how much 1 pte represent, now calculating how much a 64b pte req represent + // That depends on the pte shape (i.e. 8x1, 4x2, 2x4) + //log2_dpte_req_height = log2_vmpg_height + log2_dpte_req_height_ptes; + //log2_dpte_req_width = log2_vmpg_width + log2_dpte_req_width_ptes; + dpte_req_height = 1 << log2_dpte_req_height; + dpte_req_width = 1 << log2_dpte_req_width; + + // calculate pitch dpte row buffer can hold + // round the result down to a power of two. + pde_buf_entries = yuv420 ? (pde_proc_buffer_size_64k_reqs >> 1) : pde_proc_buffer_size_64k_reqs; + if (surf_linear) { + unsigned int dpte_row_height; + + log2_dpte_row_height_linear = dml_floor(dml_log2(dml_min(64 * 1024 * pde_buf_entries + / bytes_per_element, + dpte_buf_in_pte_reqs + * dpte_req_width) + / data_pitch), + 1); + + ASSERT(log2_dpte_row_height_linear >= 3); + + if (log2_dpte_row_height_linear > 7) + log2_dpte_row_height_linear = 7; + + log2_dpte_row_height = log2_dpte_row_height_linear; + // For linear, the dpte row is pitch dependent and the pte requests wrap at the pitch boundary. + // the dpte_row_width_ub is the upper bound of data_pitch*dpte_row_height in elements with this unique buffering. + dpte_row_height = 1 << log2_dpte_row_height; + dpte_row_width_ub = dml_round_to_multiple(data_pitch * dpte_row_height - 1, + dpte_req_width, + 1) + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + // the upper bound of the dpte_row_width without dependency on viewport position follows. + // for tiled mode, row height is the same as req height and row store up to vp size upper bound + if (!surf_vert) { + log2_dpte_row_height = log2_dpte_req_height; + dpte_row_width_ub = dml_round_to_multiple(vp_width - 1, dpte_req_width, 1) + + dpte_req_width; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_width; + } else { + log2_dpte_row_height = + (log2_blk_width < log2_dpte_req_width) ? + log2_blk_width : log2_dpte_req_width; + dpte_row_width_ub = dml_round_to_multiple(vp_height - 1, dpte_req_height, 1) + + dpte_req_height; + rq_dlg_param->dpte_req_per_row_ub = dpte_row_width_ub / dpte_req_height; + } + } + if (log2_blk_bytes >= 16 && log2_vmpg_bytes == 12) // tile block >= 64KB + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 128; //2*64B dpte request + else + rq_dlg_param->dpte_bytes_per_row_ub = rq_dlg_param->dpte_req_per_row_ub * 64; //64B dpte request + + rq_dlg_param->dpte_row_height = 1 << log2_dpte_row_height; + + // the dpte_group_bytes is reduced for the specific case of vertical + // access of a tile surface that has dpte request of 8x1 ptes. + if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + rq_sizing_param->dpte_group_bytes = 512; + else + //full size + rq_sizing_param->dpte_group_bytes = 2048; + + //since pte request size is 64byte, the number of data pte requests per full sized group is as follows. + log2_dpte_group_bytes = dml_log2(rq_sizing_param->dpte_group_bytes); + log2_dpte_group_length = log2_dpte_group_bytes - 6; //length in 64b requests + + // full sized data pte group width in elements + if (!surf_vert) + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_width; + else + log2_dpte_group_width = log2_dpte_group_length + log2_dpte_req_height; + + //But if the tile block >=64KB and the page size is 4KB, then each dPTE request is 2*64B + if ((log2_blk_bytes >= 16) && (log2_vmpg_bytes == 12)) // tile block >= 64KB + log2_dpte_group_width = log2_dpte_group_width - 1; + + dpte_group_width = 1 << log2_dpte_group_width; + + // since dpte groups are only aligned to dpte_req_width and not dpte_group_width, + // the upper bound for the dpte groups per row is as follows. + rq_dlg_param->dpte_groups_per_row_ub = dml_ceil((double) dpte_row_width_ub / dpte_group_width, + 1); +} + +static void get_surf_rq_param(struct display_mode_lib *mode_lib, + display_data_rq_sizing_params_st *rq_sizing_param, + display_data_rq_dlg_params_st *rq_dlg_param, + display_data_rq_misc_params_st *rq_misc_param, + const display_pipe_source_params_st pipe_src_param, + bool is_chroma) +{ + bool mode_422 = 0; + unsigned int vp_width = 0; + unsigned int vp_height = 0; + unsigned int data_pitch = 0; + unsigned int meta_pitch = 0; + unsigned int ppe = mode_422 ? 2 : 1; + + // FIXME check if ppe apply for both luma and chroma in 422 case + if (is_chroma) { + vp_width = pipe_src_param.viewport_width_c / ppe; + vp_height = pipe_src_param.viewport_height_c; + data_pitch = pipe_src_param.data_pitch_c; + meta_pitch = pipe_src_param.meta_pitch_c; + } else { + vp_width = pipe_src_param.viewport_width / ppe; + vp_height = pipe_src_param.viewport_height; + data_pitch = pipe_src_param.data_pitch; + meta_pitch = pipe_src_param.meta_pitch; + } + + rq_sizing_param->chunk_bytes = 8192; + + if (rq_sizing_param->chunk_bytes == 64 * 1024) + rq_sizing_param->min_chunk_bytes = 0; + else + rq_sizing_param->min_chunk_bytes = 1024; + + rq_sizing_param->meta_chunk_bytes = 2048; + rq_sizing_param->min_meta_chunk_bytes = 256; + + rq_sizing_param->mpte_group_bytes = 2048; + + get_meta_and_pte_attr(mode_lib, + rq_dlg_param, + rq_misc_param, + rq_sizing_param, + vp_width, + vp_height, + data_pitch, + meta_pitch, + pipe_src_param.source_format, + pipe_src_param.sw_mode, + pipe_src_param.macro_tile_size, + pipe_src_param.source_scan, + is_chroma); +} + +static void dml20v2_rq_dlg_get_rq_params(struct display_mode_lib *mode_lib, + display_rq_params_st *rq_param, + const display_pipe_source_params_st pipe_src_param) +{ + // get param for luma surface + rq_param->yuv420 = pipe_src_param.source_format == dm_420_8 + || pipe_src_param.source_format == dm_420_10; + rq_param->yuv420_10bpc = pipe_src_param.source_format == dm_420_10; + + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_l), + &(rq_param->dlg.rq_l), + &(rq_param->misc.rq_l), + pipe_src_param, + 0); + + if (is_dual_plane((enum source_format_class)(pipe_src_param.source_format))) { + // get param for chroma surface + get_surf_rq_param(mode_lib, + &(rq_param->sizing.rq_c), + &(rq_param->dlg.rq_c), + &(rq_param->misc.rq_c), + pipe_src_param, + 1); + } + + // calculate how to split the det buffer space between luma and chroma + handle_det_buf_split(mode_lib, rq_param, pipe_src_param); + print__rq_params_st(mode_lib, *rq_param); +} + +void dml20v2_rq_dlg_get_rq_reg(struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st pipe_param) +{ + display_rq_params_st rq_param = {0}; + + memset(rq_regs, 0, sizeof(*rq_regs)); + dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, pipe_param.src); + extract_rq_regs(mode_lib, rq_regs, rq_param); + + print__rq_regs_st(mode_lib, *rq_regs); +} + +// Note: currently taken in as is. +// Nice to decouple code from hw register implement and extract code that are repeated for luma and chroma. +static void dml20v2_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, + const display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + display_dlg_regs_st *disp_dlg_regs, + display_ttu_regs_st *disp_ttu_regs, + const display_rq_dlg_params_st rq_dlg_param, + const display_dlg_sys_params_st dlg_sys_param, + const bool cstate_en, + const bool pstate_en) +{ + const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; + const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; + const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; + const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; + const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; + const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; + + // ------------------------- + // Section 1.15.2.1: OTG dependent Params + // ------------------------- + // Timing + unsigned int htotal = dst->htotal; +// unsigned int hblank_start = dst.hblank_start; // TODO: Remove + unsigned int hblank_end = dst->hblank_end; + unsigned int vblank_start = dst->vblank_start; + unsigned int vblank_end = dst->vblank_end; + unsigned int min_vblank = mode_lib->ip.min_vblank_lines; + + double dppclk_freq_in_mhz = clks->dppclk_mhz; + double dispclk_freq_in_mhz = clks->dispclk_mhz; + double refclk_freq_in_mhz = clks->refclk_mhz; + double pclk_freq_in_mhz = dst->pixel_rate_mhz; + bool interlaced = dst->interlaced; + + double ref_freq_to_pix_freq = refclk_freq_in_mhz / pclk_freq_in_mhz; + + double min_dcfclk_mhz; + double t_calc_us; + double min_ttu_vblank; + + double min_dst_y_ttu_vblank; + unsigned int dlg_vblank_start; + bool dual_plane; + bool mode_422; + unsigned int access_dir; + unsigned int vp_height_l; + unsigned int vp_width_l; + unsigned int vp_height_c; + unsigned int vp_width_c; + + // Scaling + unsigned int htaps_l; + unsigned int htaps_c; + double hratio_l; + double hratio_c; + double vratio_l; + double vratio_c; + bool scl_enable; + + double line_time_in_us; + // double vinit_l; + // double vinit_c; + // double vinit_bot_l; + // double vinit_bot_c; + + // unsigned int swath_height_l; + unsigned int swath_width_ub_l; + // unsigned int dpte_bytes_per_row_ub_l; + unsigned int dpte_groups_per_row_ub_l; + // unsigned int meta_pte_bytes_per_frame_ub_l; + // unsigned int meta_bytes_per_row_ub_l; + + // unsigned int swath_height_c; + unsigned int swath_width_ub_c; + // unsigned int dpte_bytes_per_row_ub_c; + unsigned int dpte_groups_per_row_ub_c; + + unsigned int meta_chunks_per_row_ub_l; + unsigned int meta_chunks_per_row_ub_c; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + + unsigned int dppclk_delay_subtotal; + unsigned int dispclk_delay_subtotal; + unsigned int pixel_rate_delay_subtotal; + + unsigned int vstartup_start; + unsigned int dst_x_after_scaler; + unsigned int dst_y_after_scaler; + double line_wait; + double dst_y_prefetch; + double dst_y_per_vm_vblank; + double dst_y_per_row_vblank; + double dst_y_per_vm_flip; + double dst_y_per_row_flip; + double min_dst_y_per_vm_vblank; + double min_dst_y_per_row_vblank; + double lsw; + double vratio_pre_l; + double vratio_pre_c; + unsigned int req_per_swath_ub_l; + unsigned int req_per_swath_ub_c; + unsigned int meta_row_height_l; + unsigned int meta_row_height_c; + unsigned int swath_width_pixels_ub_l; + unsigned int swath_width_pixels_ub_c; + unsigned int scaler_rec_in_width_l; + unsigned int scaler_rec_in_width_c; + unsigned int dpte_row_height_l; + unsigned int dpte_row_height_c; + double hscale_pixel_rate_l; + double hscale_pixel_rate_c; + double min_hratio_fact_l; + double min_hratio_fact_c; + double refcyc_per_line_delivery_pre_l; + double refcyc_per_line_delivery_pre_c; + double refcyc_per_line_delivery_l; + double refcyc_per_line_delivery_c; + + double refcyc_per_req_delivery_pre_l; + double refcyc_per_req_delivery_pre_c; + double refcyc_per_req_delivery_l; + double refcyc_per_req_delivery_c; + + unsigned int full_recout_width; + double xfc_transfer_delay; + double xfc_precharge_delay; + double xfc_remote_surface_flip_latency; + double xfc_dst_y_delta_drq_limit; + double xfc_prefetch_margin; + double refcyc_per_req_delivery_pre_cur0; + double refcyc_per_req_delivery_cur0; + double refcyc_per_req_delivery_pre_cur1; + double refcyc_per_req_delivery_cur1; + + memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); + memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); + + dml_print("DML_DLG: %s: cstate_en = %d\n", __func__, cstate_en); + dml_print("DML_DLG: %s: pstate_en = %d\n", __func__, pstate_en); + + dml_print("DML_DLG: %s: dppclk_freq_in_mhz = %3.2f\n", __func__, dppclk_freq_in_mhz); + dml_print("DML_DLG: %s: dispclk_freq_in_mhz = %3.2f\n", __func__, dispclk_freq_in_mhz); + dml_print("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, refclk_freq_in_mhz); + dml_print("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, pclk_freq_in_mhz); + dml_print("DML_DLG: %s: interlaced = %d\n", __func__, interlaced); + ASSERT(ref_freq_to_pix_freq < 4.0); + + disp_dlg_regs->ref_freq_to_pix_freq = + (unsigned int) (ref_freq_to_pix_freq * dml_pow(2, 19)); + disp_dlg_regs->refcyc_per_htotal = (unsigned int) (ref_freq_to_pix_freq * (double) htotal + * dml_pow(2, 8)); + disp_dlg_regs->dlg_vblank_end = interlaced ? (vblank_end / 2) : vblank_end; // 15 bits + disp_dlg_regs->refcyc_h_blank_end = (unsigned int) ((double) hblank_end + * (double) ref_freq_to_pix_freq); + ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int) dml_pow(2, 13)); + + min_dcfclk_mhz = dlg_sys_param.deepsleep_dcfclk_mhz; + t_calc_us = get_tcalc(mode_lib, e2e_pipe_param, num_pipes); + min_ttu_vblank = get_min_ttu_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_ttu_vblank = min_ttu_vblank * pclk_freq_in_mhz / (double) htotal; + dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; + + disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start + + min_dst_y_ttu_vblank) * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int) dml_pow(2, 18)); + + dml_print("DML_DLG: %s: min_dcfclk_mhz = %3.2f\n", + __func__, + min_dcfclk_mhz); + dml_print("DML_DLG: %s: min_ttu_vblank = %3.2f\n", + __func__, + min_ttu_vblank); + dml_print("DML_DLG: %s: min_dst_y_ttu_vblank = %3.2f\n", + __func__, + min_dst_y_ttu_vblank); + dml_print("DML_DLG: %s: t_calc_us = %3.2f\n", + __func__, + t_calc_us); + dml_print("DML_DLG: %s: disp_dlg_regs->min_dst_y_next_start = 0x%0x\n", + __func__, + disp_dlg_regs->min_dst_y_next_start); + dml_print("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", + __func__, + ref_freq_to_pix_freq); + + // ------------------------- + // Section 1.15.2.2: Prefetch, Active and TTU + // ------------------------- + // Prefetch Calc + // Source +// dcc_en = src.dcc; + dual_plane = is_dual_plane((enum source_format_class)(src->source_format)); + mode_422 = 0; // FIXME + access_dir = (src->source_scan == dm_vert); // vp access direction: horizontal or vertical accessed +// bytes_per_element_l = get_bytes_per_element(source_format_class(src.source_format), 0); +// bytes_per_element_c = get_bytes_per_element(source_format_class(src.source_format), 1); + vp_height_l = src->viewport_height; + vp_width_l = src->viewport_width; + vp_height_c = src->viewport_height_c; + vp_width_c = src->viewport_width_c; + + // Scaling + htaps_l = taps->htaps; + htaps_c = taps->htaps_c; + hratio_l = scl->hscl_ratio; + hratio_c = scl->hscl_ratio_c; + vratio_l = scl->vscl_ratio; + vratio_c = scl->vscl_ratio_c; + scl_enable = scl->scl_enable; + + line_time_in_us = (htotal / pclk_freq_in_mhz); +// vinit_l = scl.vinit; +// vinit_c = scl.vinit_c; +// vinit_bot_l = scl.vinit_bot; +// vinit_bot_c = scl.vinit_bot_c; + +// unsigned int swath_height_l = rq_dlg_param.rq_l.swath_height; + swath_width_ub_l = rq_dlg_param.rq_l.swath_width_ub; +// unsigned int dpte_bytes_per_row_ub_l = rq_dlg_param.rq_l.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_l = rq_dlg_param.rq_l.dpte_groups_per_row_ub; +// unsigned int meta_pte_bytes_per_frame_ub_l = rq_dlg_param.rq_l.meta_pte_bytes_per_frame_ub; +// unsigned int meta_bytes_per_row_ub_l = rq_dlg_param.rq_l.meta_bytes_per_row_ub; + +// unsigned int swath_height_c = rq_dlg_param.rq_c.swath_height; + swath_width_ub_c = rq_dlg_param.rq_c.swath_width_ub; + // dpte_bytes_per_row_ub_c = rq_dlg_param.rq_c.dpte_bytes_per_row_ub; + dpte_groups_per_row_ub_c = rq_dlg_param.rq_c.dpte_groups_per_row_ub; + + meta_chunks_per_row_ub_l = rq_dlg_param.rq_l.meta_chunks_per_row_ub; + meta_chunks_per_row_ub_c = rq_dlg_param.rq_c.meta_chunks_per_row_ub; + vupdate_offset = dst->vupdate_offset; + vupdate_width = dst->vupdate_width; + vready_offset = dst->vready_offset; + + dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; + dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; + + if (scl_enable) + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; + else + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; + + dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; + + if (dout->dsc_enable) { + double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dispclk_delay_subtotal += dsc_delay; + } + + pixel_rate_delay_subtotal = dppclk_delay_subtotal * pclk_freq_in_mhz / dppclk_freq_in_mhz + + dispclk_delay_subtotal * pclk_freq_in_mhz / dispclk_freq_in_mhz; + + vstartup_start = dst->vstartup_start; + if (interlaced) { + if (vstartup_start / 2.0 + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end / 2.0) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } else { + if (vstartup_start + - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal + <= vblank_end) + disp_dlg_regs->vready_after_vcount0 = 1; + else + disp_dlg_regs->vready_after_vcount0 = 0; + } + + // TODO: Where is this coming from? + if (interlaced) + vstartup_start = vstartup_start / 2; + + // TODO: What if this min_vblank doesn't match the value in the dml_config_settings.cpp? + if (vstartup_start >= min_vblank) { + dml_print("WARNING: DML_DLG: %s: vblank_start=%d vblank_end=%d\n", + __func__, + vblank_start, + vblank_end); + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + min_vblank = vstartup_start + 1; + dml_print("WARNING: DML_DLG: %s: vstartup_start=%d should be less than min_vblank=%d\n", + __func__, + vstartup_start, + min_vblank); + } + + dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: htotal = %d\n", __func__, htotal); + dml_print("DML_DLG: %s: pixel_rate_delay_subtotal = %d\n", + __func__, + pixel_rate_delay_subtotal); + dml_print("DML_DLG: %s: dst_x_after_scaler = %d\n", + __func__, + dst_x_after_scaler); + dml_print("DML_DLG: %s: dst_y_after_scaler = %d\n", + __func__, + dst_y_after_scaler); + + // Lwait + line_wait = mode_lib->soc.urgent_latency_us; + if (cstate_en) + line_wait = dml_max(mode_lib->soc.sr_enter_plus_exit_time_us, line_wait); + if (pstate_en) + line_wait = dml_max(mode_lib->soc.dram_clock_change_latency_us + + mode_lib->soc.urgent_latency_us, + line_wait); + line_wait = line_wait / line_time_in_us; + + dst_y_prefetch = get_dst_y_prefetch(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dml_print("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, dst_y_prefetch); + + dst_y_per_vm_vblank = get_dst_y_per_vm_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_row_vblank = get_dst_y_per_row_vblank(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + dst_y_per_vm_flip = get_dst_y_per_vm_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_y_per_row_flip = get_dst_y_per_row_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + min_dst_y_per_vm_vblank = 8.0; + min_dst_y_per_row_vblank = 16.0; + + // magic! + if (htotal <= 75) { + min_vblank = 300; + min_dst_y_per_vm_vblank = 100.0; + min_dst_y_per_row_vblank = 100.0; + } + + dml_print("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, dst_y_per_vm_vblank); + dml_print("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, dst_y_per_row_vblank); + + ASSERT(dst_y_per_vm_vblank < min_dst_y_per_vm_vblank); + ASSERT(dst_y_per_row_vblank < min_dst_y_per_row_vblank); + + ASSERT(dst_y_prefetch > (dst_y_per_vm_vblank + dst_y_per_row_vblank)); + lsw = dst_y_prefetch - (dst_y_per_vm_vblank + dst_y_per_row_vblank); + + dml_print("DML_DLG: %s: lsw = %3.2f\n", __func__, lsw); + + vratio_pre_l = get_vratio_prefetch_l(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + vratio_pre_c = get_vratio_prefetch_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + + dml_print("DML_DLG: %s: vratio_pre_l=%3.2f\n", __func__, vratio_pre_l); + dml_print("DML_DLG: %s: vratio_pre_c=%3.2f\n", __func__, vratio_pre_c); + + // Active + req_per_swath_ub_l = rq_dlg_param.rq_l.req_per_swath_ub; + req_per_swath_ub_c = rq_dlg_param.rq_c.req_per_swath_ub; + meta_row_height_l = rq_dlg_param.rq_l.meta_row_height; + meta_row_height_c = rq_dlg_param.rq_c.meta_row_height; + swath_width_pixels_ub_l = 0; + swath_width_pixels_ub_c = 0; + scaler_rec_in_width_l = 0; + scaler_rec_in_width_c = 0; + dpte_row_height_l = rq_dlg_param.rq_l.dpte_row_height; + dpte_row_height_c = rq_dlg_param.rq_c.dpte_row_height; + + if (mode_422) { + swath_width_pixels_ub_l = swath_width_ub_l * 2; // *2 for 2 pixel per element + swath_width_pixels_ub_c = swath_width_ub_c * 2; + } else { + swath_width_pixels_ub_l = swath_width_ub_l * 1; + swath_width_pixels_ub_c = swath_width_ub_c * 1; + } + + hscale_pixel_rate_l = 0.; + hscale_pixel_rate_c = 0.; + min_hratio_fact_l = 1.0; + min_hratio_fact_c = 1.0; + + if (htaps_l <= 1) + min_hratio_fact_l = 2.0; + else if (htaps_l <= 6) { + if ((hratio_l * 2.0) > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l * 2.0; + } else { + if (hratio_l > 4.0) + min_hratio_fact_l = 4.0; + else + min_hratio_fact_l = hratio_l; + } + + hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; + + if (htaps_c <= 1) + min_hratio_fact_c = 2.0; + else if (htaps_c <= 6) { + if ((hratio_c * 2.0) > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c * 2.0; + } else { + if (hratio_c > 4.0) + min_hratio_fact_c = 4.0; + else + min_hratio_fact_c = hratio_c; + } + + hscale_pixel_rate_c = min_hratio_fact_c * dppclk_freq_in_mhz; + + refcyc_per_line_delivery_pre_l = 0.; + refcyc_per_line_delivery_pre_c = 0.; + refcyc_per_line_delivery_l = 0.; + refcyc_per_line_delivery_c = 0.; + + refcyc_per_req_delivery_pre_l = 0.; + refcyc_per_req_delivery_pre_c = 0.; + refcyc_per_req_delivery_l = 0.; + refcyc_per_req_delivery_c = 0.; + + full_recout_width = 0; + // In ODM + if (src->is_hsplit) { + // This "hack" is only allowed (and valid) for MPC combine. In ODM + // combine, you MUST specify the full_recout_width...according to Oswin + if (dst->full_recout_width == 0 && !dst->odm_combine) { + dml_print("DML_DLG: %s: Warning: full_recout_width not set in hsplit mode\n", + __func__); + full_recout_width = dst->recout_width * 2; // assume half split for dcn1 + } else + full_recout_width = dst->full_recout_width; + } else + full_recout_width = dst->recout_width; + + // As of DCN2, mpc_combine and odm_combine are mutually exclusive + refcyc_per_line_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + refcyc_per_line_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + swath_width_pixels_ub_l, + 1); // per line + + dml_print("DML_DLG: %s: full_recout_width = %d\n", + __func__, + full_recout_width); + dml_print("DML_DLG: %s: hscale_pixel_rate_l = %3.2f\n", + __func__, + hscale_pixel_rate_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", + __func__, + refcyc_per_line_delivery_l); + + if (dual_plane) { + refcyc_per_line_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + refcyc_per_line_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + swath_width_pixels_ub_c, + 1); // per line + + dml_print("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", + __func__, + refcyc_per_line_delivery_c); + } + + // TTU - Luma / Chroma + if (access_dir) { // vertical access + scaler_rec_in_width_l = vp_height_l; + scaler_rec_in_width_c = vp_height_c; + } else { + scaler_rec_in_width_l = vp_width_l; + scaler_rec_in_width_c = vp_width_c; + } + + refcyc_per_req_delivery_pre_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + refcyc_per_req_delivery_l = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_l, + hscale_pixel_rate_l, + scaler_rec_in_width_l, + req_per_swath_ub_l); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_l); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", + __func__, + refcyc_per_req_delivery_l); + + ASSERT(refcyc_per_req_delivery_pre_l < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_l < dml_pow(2, 13)); + + if (dual_plane) { + refcyc_per_req_delivery_pre_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_pre_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + refcyc_per_req_delivery_c = get_refcyc_per_delivery(mode_lib, + refclk_freq_in_mhz, + pclk_freq_in_mhz, + dst->odm_combine, + full_recout_width, + dst->hactive, + vratio_c, + hscale_pixel_rate_c, + scaler_rec_in_width_c, + req_per_swath_ub_c); // per req + + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_pre_c); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", + __func__, + refcyc_per_req_delivery_c); + + ASSERT(refcyc_per_req_delivery_pre_c < dml_pow(2, 13)); + ASSERT(refcyc_per_req_delivery_c < dml_pow(2, 13)); + } + + // XFC + xfc_transfer_delay = get_xfc_transfer_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + xfc_precharge_delay = get_xfc_precharge_delay(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + xfc_remote_surface_flip_latency = get_xfc_remote_surface_flip_latency(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + xfc_dst_y_delta_drq_limit = xfc_remote_surface_flip_latency; + xfc_prefetch_margin = get_xfc_prefetch_margin(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx); + + // TTU - Cursor + refcyc_per_req_delivery_pre_cur0 = 0.0; + refcyc_per_req_delivery_cur0 = 0.0; + if (src->num_cursors > 0) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur0, + &refcyc_per_req_delivery_cur0, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur0_src_width, + (enum cursor_bpp)(src->cur0_bpp)); + } + + refcyc_per_req_delivery_pre_cur1 = 0.0; + refcyc_per_req_delivery_cur1 = 0.0; + if (src->num_cursors > 1) { + calculate_ttu_cursor(mode_lib, + &refcyc_per_req_delivery_pre_cur1, + &refcyc_per_req_delivery_cur1, + refclk_freq_in_mhz, + ref_freq_to_pix_freq, + hscale_pixel_rate_l, + scl->hscl_ratio, + vratio_pre_l, + vratio_l, + src->cur1_src_width, + (enum cursor_bpp)(src->cur1_bpp)); + } + + // TTU - Misc + // all hard-coded + + // Assignment to register structures + disp_dlg_regs->dst_y_after_scaler = dst_y_after_scaler; // in terms of line + disp_dlg_regs->refcyc_x_after_scaler = dst_x_after_scaler * ref_freq_to_pix_freq; // in terms of refclk + ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int) dml_pow(2, 13)); + disp_dlg_regs->dst_y_prefetch = (unsigned int) (dst_y_prefetch * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int) (dst_y_per_vm_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_vblank = (unsigned int) (dst_y_per_row_vblank * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_vm_flip = (unsigned int) (dst_y_per_vm_flip * dml_pow(2, 2)); + disp_dlg_regs->dst_y_per_row_flip = (unsigned int) (dst_y_per_row_flip * dml_pow(2, 2)); + + disp_dlg_regs->vratio_prefetch = (unsigned int) (vratio_pre_l * dml_pow(2, 19)); + disp_dlg_regs->vratio_prefetch_c = (unsigned int) (vratio_pre_c * dml_pow(2, 19)); + + disp_dlg_regs->refcyc_per_pte_group_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int) dml_pow(2, 13)); + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int) (dst_y_per_row_vblank + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c + < (unsigned int) dml_pow(2, 13)); + } + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = + (unsigned int) (dst_y_per_row_vblank * (double) htotal + * ref_freq_to_pix_freq / (double) meta_chunks_per_row_ub_l); + ASSERT(disp_dlg_regs->refcyc_per_meta_chunk_vblank_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = + disp_dlg_regs->refcyc_per_meta_chunk_vblank_l; // dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_l; + disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int) (dst_y_per_row_flip * htotal + * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_l; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / dpte_groups_per_row_ub_c; + disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int) (dst_y_per_row_flip + * htotal * ref_freq_to_pix_freq) / meta_chunks_per_row_ub_c; + } + + disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_pte_row_nom_l < (unsigned int) dml_pow(2, 17)); + + if (dual_plane) { + disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int) ((double) dpte_row_height_c + / (double) vratio_c * dml_pow(2, 2)); + if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int) dml_pow(2, 17)) { + dml_print("DML_DLG: %s: Warning dst_y_per_pte_row_nom_c %u larger than supported by register format U15.2 %u\n", + __func__, + disp_dlg_regs->dst_y_per_pte_row_nom_c, + (unsigned int) dml_pow(2, 17) - 1); + } + } + + disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * dml_pow(2, 2)); + ASSERT(disp_dlg_regs->dst_y_per_meta_row_nom_l < (unsigned int) dml_pow(2, 17)); + + disp_dlg_regs->dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_l; // TODO: dcc for 4:2:0 is not supported in dcn1.0. assigned to be the same as _l for now + + disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int) ((double) dpte_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_l = dml_pow(2, 23) - 1; + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int) ((double) meta_row_height_l + / (double) vratio_l * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_l); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_l >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_l = dml_pow(2, 23) - 1; + + if (dual_plane) { + disp_dlg_regs->refcyc_per_pte_group_nom_c = + (unsigned int) ((double) dpte_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) dpte_groups_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_pte_group_nom_c = dml_pow(2, 23) - 1; + + // TODO: Is this the right calculation? Does htotal need to be halved? + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = + (unsigned int) ((double) meta_row_height_c / (double) vratio_c + * (double) htotal * ref_freq_to_pix_freq + / (double) meta_chunks_per_row_ub_c); + if (disp_dlg_regs->refcyc_per_meta_chunk_nom_c >= (unsigned int) dml_pow(2, 23)) + disp_dlg_regs->refcyc_per_meta_chunk_nom_c = dml_pow(2, 23) - 1; + } + + disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_l, + 1); + disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int) dml_floor(refcyc_per_line_delivery_l, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int) dml_floor(refcyc_per_line_delivery_pre_c, + 1); + disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int) dml_floor(refcyc_per_line_delivery_c, + 1); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int) dml_pow(2, 13)); + ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int) dml_pow(2, 13)); + + disp_dlg_regs->chunk_hdl_adjust_cur0 = 3; + disp_dlg_regs->dst_y_offset_cur0 = 0; + disp_dlg_regs->chunk_hdl_adjust_cur1 = 3; + disp_dlg_regs->dst_y_offset_cur1 = 0; + + disp_dlg_regs->xfc_reg_transfer_delay = xfc_transfer_delay; + disp_dlg_regs->xfc_reg_precharge_delay = xfc_precharge_delay; + disp_dlg_regs->xfc_reg_remote_surface_flip_latency = xfc_remote_surface_flip_latency; + disp_dlg_regs->xfc_reg_prefetch_margin = dml_ceil(xfc_prefetch_margin * refclk_freq_in_mhz, + 1); + + // slave has to have this value also set to off + if (src->xfc_enable && !src->xfc_slave) + disp_dlg_regs->dst_y_delta_drq_limit = dml_ceil(xfc_dst_y_delta_drq_limit, 1); + else + disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off + + disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int) (refcyc_per_req_delivery_pre_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int) (refcyc_per_req_delivery_l + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int) (refcyc_per_req_delivery_pre_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int) (refcyc_per_req_delivery_c + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur0 = + (unsigned int) (refcyc_per_req_delivery_pre_cur0 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur0 = (unsigned int) (refcyc_per_req_delivery_cur0 + * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_pre_cur1 = + (unsigned int) (refcyc_per_req_delivery_pre_cur1 * dml_pow(2, 10)); + disp_ttu_regs->refcyc_per_req_delivery_cur1 = (unsigned int) (refcyc_per_req_delivery_cur1 + * dml_pow(2, 10)); + disp_ttu_regs->qos_level_low_wm = 0; + ASSERT(disp_ttu_regs->qos_level_low_wm < dml_pow(2, 14)); + disp_ttu_regs->qos_level_high_wm = (unsigned int) (4.0 * (double) htotal + * ref_freq_to_pix_freq); + /*ASSERT(disp_ttu_regs->qos_level_high_wm < dml_pow(2, 14));*/ + + disp_ttu_regs->qos_level_flip = 14; + disp_ttu_regs->qos_level_fixed_l = 8; + disp_ttu_regs->qos_level_fixed_c = 8; + disp_ttu_regs->qos_level_fixed_cur0 = 8; + disp_ttu_regs->qos_ramp_disable_l = 0; + disp_ttu_regs->qos_ramp_disable_c = 0; + disp_ttu_regs->qos_ramp_disable_cur0 = 0; + + disp_ttu_regs->min_ttu_vblank = min_ttu_vblank * refclk_freq_in_mhz; + ASSERT(disp_ttu_regs->min_ttu_vblank < dml_pow(2, 24)); + + print__ttu_regs_st(mode_lib, *disp_ttu_regs); + print__dlg_regs_st(mode_lib, *disp_dlg_regs); +} + +void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support) +{ + display_rq_params_st rq_param = {0}; + display_dlg_sys_params_st dlg_sys_param = {0}; + + // Get watermark and Tex. + dlg_sys_param.t_urg_wm_us = get_wm_urgent(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.deepsleep_dcfclk_mhz = get_clk_dcf_deepsleep(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_extra_us = get_urgent_extra_latency(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.mem_trip_us = get_wm_memory_trip(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_mclk_wm_us = get_wm_dram_clock_change(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.t_sr_wm_us = get_wm_stutter_enter_exit(mode_lib, e2e_pipe_param, num_pipes); + dlg_sys_param.total_flip_bw = get_total_immediate_flip_bw(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, + e2e_pipe_param, + num_pipes); + dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency + / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated + + print__dlg_sys_params_st(mode_lib, dlg_sys_param); + + // system parameter calculation done + + dml_print("DML_DLG: Calculation for pipe[%d] start\n\n", pipe_idx); + dml20v2_rq_dlg_get_rq_params(mode_lib, &rq_param, e2e_pipe_param[pipe_idx].pipe.src); + dml20v2_rq_dlg_get_dlg_params(mode_lib, + e2e_pipe_param, + num_pipes, + pipe_idx, + dlg_regs, + ttu_regs, + rq_param.dlg, + dlg_sys_param, + cstate_en, + pstate_en); + dml_print("DML_DLG: Calculation for pipe[%d] end\n", pipe_idx); +} + +static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, + double *refcyc_per_req_delivery_pre_cur, + double *refcyc_per_req_delivery_cur, + double refclk_freq_in_mhz, + double ref_freq_to_pix_freq, + double hscale_pixel_rate_l, + double hscl_ratio, + double vratio_pre_l, + double vratio_l, + unsigned int cur_width, + enum cursor_bpp cur_bpp) +{ + unsigned int cur_src_width = cur_width; + unsigned int cur_req_size = 0; + unsigned int cur_req_width = 0; + double cur_width_ub = 0.0; + double cur_req_per_width = 0.0; + double hactive_cur = 0.0; + + ASSERT(cur_src_width <= 256); + + *refcyc_per_req_delivery_pre_cur = 0.0; + *refcyc_per_req_delivery_cur = 0.0; + if (cur_src_width > 0) { + unsigned int cur_bit_per_pixel = 0; + + if (cur_bpp == dm_cur_2bit) { + cur_req_size = 64; // byte + cur_bit_per_pixel = 2; + } else { // 32bit + cur_bit_per_pixel = 32; + if (cur_src_width >= 1 && cur_src_width <= 16) + cur_req_size = 64; + else if (cur_src_width >= 17 && cur_src_width <= 31) + cur_req_size = 128; + else + cur_req_size = 256; + } + + cur_req_width = (double) cur_req_size / ((double) cur_bit_per_pixel / 8.0); + cur_width_ub = dml_ceil((double) cur_src_width / (double) cur_req_width, 1) + * (double) cur_req_width; + cur_req_per_width = cur_width_ub / (double) cur_req_width; + hactive_cur = (double) cur_src_width / hscl_ratio; // FIXME: oswin to think about what to do for cursor + + if (vratio_pre_l <= 1.0) { + *refcyc_per_req_delivery_pre_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_pre_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + ASSERT(*refcyc_per_req_delivery_pre_cur < dml_pow(2, 13)); + + if (vratio_l <= 1.0) { + *refcyc_per_req_delivery_cur = hactive_cur * ref_freq_to_pix_freq + / (double) cur_req_per_width; + } else { + *refcyc_per_req_delivery_cur = (double) refclk_freq_in_mhz + * (double) cur_src_width / hscale_pixel_rate_l + / (double) cur_req_per_width; + } + + dml_print("DML_DLG: %s: cur_req_width = %d\n", + __func__, + cur_req_width); + dml_print("DML_DLG: %s: cur_width_ub = %3.2f\n", + __func__, + cur_width_ub); + dml_print("DML_DLG: %s: cur_req_per_width = %3.2f\n", + __func__, + cur_req_per_width); + dml_print("DML_DLG: %s: hactive_cur = %3.2f\n", + __func__, + hactive_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_pre_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_pre_cur); + dml_print("DML_DLG: %s: refcyc_per_req_delivery_cur = %3.2f\n", + __func__, + *refcyc_per_req_delivery_cur); + + ASSERT(*refcyc_per_req_delivery_cur < dml_pow(2, 13)); + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h new file mode 100644 index 000000000000..0378406bf7e7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.h @@ -0,0 +1,74 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DML20V2_DISPLAY_RQ_DLG_CALC_H__ +#define __DML20V2_DISPLAY_RQ_DLG_CALC_H__ + +#include "../dml_common_defs.h" +#include "../display_rq_dlg_helpers.h" + +struct display_mode_lib; + + +// Function: dml_rq_dlg_get_rq_reg +// Main entry point for test to get the register values out of this DML class. +// This function calls <get_rq_param> and <extract_rq_regs> fucntions to calculate +// and then populate the rq_regs struct +// Input: +// pipe_src_param - pipe source configuration (e.g. vp, pitch, etc.) +// Output: +// rq_regs - struct that holds all the RQ registers field value. +// See also: <display_rq_regs_st> +void dml20v2_rq_dlg_get_rq_reg( + struct display_mode_lib *mode_lib, + display_rq_regs_st *rq_regs, + const display_pipe_params_st pipe_param); + + +// Function: dml_rq_dlg_get_dlg_reg +// Calculate and return DLG and TTU register struct given the system setting +// Output: +// dlg_regs - output DLG register struct +// ttu_regs - output DLG TTU register struct +// Input: +// e2e_pipe_param - "compacted" array of e2e pipe param struct +// num_pipes - num of active "pipe" or "route" +// pipe_idx - index that identifies the e2e_pipe_param that corresponding to this dlg +// cstate - 0: when calculate min_ttu_vblank it is assumed cstate is not required. 1: Normal mode, cstate is considered. +// Added for legacy or unrealistic timing tests. +void dml20v2_rq_dlg_get_dlg_reg( + struct display_mode_lib *mode_lib, + display_dlg_regs_st *dlg_regs, + display_ttu_regs_st *ttu_regs, + display_e2e_pipe_params_st *e2e_pipe_param, + const unsigned int num_pipes, + const unsigned int pipe_idx, + const bool cstate_en, + const bool pstate_en, + const bool vm_en, + const bool ignore_viewport_pos, + const bool immediate_flip_support); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 91810c7d5cf5..96dfcd8c36bc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -28,6 +28,8 @@ #if defined(CONFIG_DRM_AMD_DC_DCN2_0) #include "dcn20/display_mode_vba_20.h" #include "dcn20/display_rq_dlg_calc_20.h" +#include "dcn20/display_mode_vba_20v2.h" +#include "dcn20/display_rq_dlg_calc_20v2.h" #endif #if defined(CONFIG_DRM_AMD_DC_DCN2_0) @@ -37,6 +39,13 @@ const struct dml_funcs dml20_funcs = { .rq_dlg_get_dlg_reg = dml20_rq_dlg_get_dlg_reg, .rq_dlg_get_rq_reg = dml20_rq_dlg_get_rq_reg }; + +const struct dml_funcs dml20v2_funcs = { + .validate = dml20v2_ModeSupportAndSystemConfigurationFull, + .recalculate = dml20v2_recalculate, + .rq_dlg_get_dlg_reg = dml20v2_rq_dlg_get_dlg_reg, + .rq_dlg_get_rq_reg = dml20v2_rq_dlg_get_rq_reg +}; #endif void dml_init_instance(struct display_mode_lib *lib, @@ -52,6 +61,9 @@ void dml_init_instance(struct display_mode_lib *lib, case DML_PROJECT_NAVI10: lib->funcs = dml20_funcs; break; + case DML_PROJECT_NAVI10v2: + lib->funcs = dml20v2_funcs; + break; #endif default: break; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h index 5bf13d67f289..870716e3c132 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h @@ -36,6 +36,7 @@ enum dml_project { DML_PROJECT_RAVEN1, #ifdef CONFIG_DRM_AMD_DC_DCN2_0 DML_PROJECT_NAVI10, + DML_PROJECT_NAVI10v2, #endif }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 5678472546ab..ab34fd26702f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -100,6 +100,7 @@ struct _vcs_dpi_soc_bounding_box_st { unsigned int vmm_page_size_bytes; unsigned int hostvm_min_page_size_bytes; double dram_clock_change_latency_us; + double dummy_pstate_latency_us; double writeback_dram_clock_change_latency_us; unsigned int return_bus_width_bytes; unsigned int voltage_override; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 4d2a1262d9db..88e63f16f7fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -568,6 +568,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) if (src->is_hsplit) { for (k = j + 1; k < mode_lib->vba.cache_num_pipes; ++k) { display_pipe_source_params_st *src_k = &pipes[k].pipe.src; + display_pipe_dest_params_st *dst_k = &pipes[k].pipe.dest; if (src_k->is_hsplit && !visited[k] && src->hsplit_grp == src_k->hsplit_grp) { @@ -575,12 +576,15 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.NumberOfActivePlanes; mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++; if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] - == dm_horz) + == dm_horz) { mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_width; - else + mode_lib->vba.ScalerRecoutWidth[mode_lib->vba.NumberOfActivePlanes] += + dst_k->recout_width; + } else { mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] += src_k->viewport_height; + } visited[k] = true; } -- 2.22.0 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx