Re: [PATCH v9] drm/i915: Update memory bandwidth formulae

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Replying to the right patch this time.
>From what the bspec says, the changes look good.
Minor feedback below inline.

With that change,
Reviewed-by: Anusha Srivatsa <anusha.srivatsa@xxxxxxxxx>


> -----Original Message-----
> From: Intel-gfx <intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of
> Radhakrishna Sripada
> Sent: Friday, October 15, 2021 2:01 PM
> To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> Subject:  [PATCH v9] drm/i915: Update memory bandwidth
> formulae
> 
> The formulae has been updated to include more variables. Make sure the
> code carries the same.
> 
> Bspec: 64631, 54023
> 
> v2: Make GEN11 follow the default route and fix calculation of
>     maxdebw(RK)
> v3: Fix div by zero on default case
>     Correct indent for fallthrough(Jani)
> v4: Fix div by zero on gen11.
> v5: Fix 0 max_numchannels case
> v6:
>     - Split gen11/gen12 algorithms
>     - Fix RKL deburst value
>     - Fix difference b/ween ICL and TGL algorithms
>     - Protect deinterleave from being 0
>     - Warn when numchannels exceeds max_numchannels
>     - Fix scaling of clk_max from different units
>     - s/deinterleave/channelwidth/ in calculating peakbw
>     - Fix off by one for num_planes TGL+
>     - Fix SAGV check
> v7: Fix div by zero error on gen11
> v8: Even though the algorithm for gen11 says that we need to return
>     derated bw for a qgv point whose planes are less than no of active
>     planes, we return 0 for deratedbw when only one plane is allowed.
>     We modify the algorithm to accommodate the case where no of active
>     planes are same as the min no of planes supported by a qgv point.
> v9: Fix dclk scaling for dg1
> 
> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> Suggested-by: Matt Roper <matthew.d.roper@xxxxxxxxx>
> Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c | 211 ++++++++++++++++++++----
>  1 file changed, 179 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c
> b/drivers/gpu/drm/i915/display/intel_bw.c
> index 8d9d888e9316..15c006194c85 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -27,6 +27,9 @@ struct intel_qgv_info {
>  	u8 num_points;
>  	u8 num_psf_points;
>  	u8 t_bl;
> +	u8 max_numchannels;
> +	u8 channel_width;
> +	u8 deinterleave;
>  };
> 
>  static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private
> *dev_priv, @@ -42,7 +45,7 @@ static int
> dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
>  		dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
>  	else
>  		dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
> -	sp->dclk = dclk_ratio * dclk_reference;
> +	sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) +
> 500,
> +1000);
> 
>  	val = intel_uncore_read(&dev_priv->uncore,
> SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
>  	if (val & DG1_GEAR_TYPE)
> @@ -69,6 +72,7 @@ static int icl_pcode_read_qgv_point_info(struct
> drm_i915_private *dev_priv,
>  					 int point)
>  {
>  	u32 val = 0, val2 = 0;
> +	u16 dclk;
>  	int ret;
> 
>  	ret = sandybridge_pcode_read(dev_priv, @@ -78,7 +82,8 @@ static
> int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
>  	if (ret)
>  		return ret;
> 
> -	sp->dclk = val & 0xffff;
> +	dclk = val & 0xffff;
> +	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv)
> > 11 ?
> +500 : 0), 1000);
>  	sp->t_rp = (val & 0xff0000) >> 16;
>  	sp->t_rcd = (val & 0xff000000) >> 24;
> 
> @@ -133,7 +138,8 @@ int icl_pcode_restrict_qgv_points(struct
> drm_i915_private *dev_priv,  }
> 
>  static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> -			      struct intel_qgv_info *qi)
> +			      struct intel_qgv_info *qi,
> +			      bool is_y_tile)
>  {
>  	const struct dram_info *dram_info = &dev_priv->dram_info;
>  	int i, ret;
> @@ -144,17 +150,41 @@ static int icl_get_qgv_points(struct
> drm_i915_private *dev_priv,
>  	if (DISPLAY_VER(dev_priv) == 12)
>  		switch (dram_info->type) {
>  		case INTEL_DRAM_DDR4:
> -			qi->t_bl = 4;
> +			qi->t_bl = is_y_tile ? 8 : 4;
> +			qi->max_numchannels = 2;
> +			qi->channel_width = 64;
> +			qi->deinterleave = is_y_tile ? 1 : 2;
>  			break;
>  		case INTEL_DRAM_DDR5:
> -			qi->t_bl = 8;
> +			qi->t_bl = is_y_tile ? 16 : 8;
> +			qi->max_numchannels = 4;
> +			qi->channel_width = 32;
> +			qi->deinterleave = is_y_tile ? 1 : 2;
> +			break;
> +		case INTEL_DRAM_LPDDR4:
> +			if (IS_ROCKETLAKE(dev_priv)) {
> +				qi->t_bl = 8;
> +				qi->max_numchannels = 4;
> +				qi->channel_width = 32;
> +				qi->deinterleave = 2;
> +				break;
> +			}
> +			fallthrough;
> +		case INTEL_DRAM_LPDDR5:
> +			qi->t_bl = 16;
> +			qi->max_numchannels = 8;
> +			qi->channel_width = 16;
> +			qi->deinterleave = is_y_tile ? 2 : 4;
>  			break;
>  		default:
>  			qi->t_bl = 16;
> +			qi->max_numchannels = 1;
>  			break;
>  		}
> -	else if (DISPLAY_VER(dev_priv) == 11)
> +	else if (DISPLAY_VER(dev_priv) == 11) {
>  		qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ?
> 4 : 8;
> +		qi->max_numchannels = 1;
> +	}
> 
>  	if (drm_WARN_ON(&dev_priv->drm,
>  			qi->num_points > ARRAY_SIZE(qi->points))) @@ -
> 193,12 +223,6 @@ static int icl_get_qgv_points(struct drm_i915_private
> *dev_priv,
>  	return 0;
>  }
> 
> -static int icl_calc_bw(int dclk, int num, int den) -{
> -	/* multiples of 16.666MHz (100/6) */
> -	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> -}
> -
>  static int adl_calc_psf_bw(int clk)
>  {
>  	/*
> @@ -240,7 +264,7 @@ static const struct intel_sa_info tgl_sa_info = {  };
> 
>  static const struct intel_sa_info rkl_sa_info = {
> -	.deburst = 16,
> +	.deburst = 8,
>  	.deprogbwlimit = 20, /* GB/s */
>  	.displayrtids = 128,
>  	.derating = 10,
> @@ -265,35 +289,130 @@ static int icl_get_bw_info(struct drm_i915_private
> *dev_priv, const struct intel
>  	struct intel_qgv_info qi = {};
>  	bool is_y_tile = true; /* assume y tile may be used */
>  	int num_channels = max_t(u8, 1, dev_priv-
> >dram_info.num_channels);
> -	int deinterleave;
> -	int ipqdepth, ipqdepthpch;
> +	int ipqdepth, ipqdepthpch = 16;
>  	int dclk_max;
>  	int maxdebw;
> +	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
>  	int i, ret;
> 
> -	ret = icl_get_qgv_points(dev_priv, &qi);
> +	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
>  	if (ret) {
>  		drm_dbg_kms(&dev_priv->drm,
>  			    "Failed to get memory subsystem information,
> ignoring bandwidth limits");
>  		return ret;
>  	}
> 
> -	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
>  	dclk_max = icl_sagv_max_dclk(&qi);
> +	maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +	qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +
> +	for (i = 0; i < num_groups; i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * qi.deinterleave / num_channels)
> << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		bi->num_qgv_points = qi.num_points;
> +		bi->num_psf_gv_points = qi.num_psf_points;
> +
> +		for (j = 0; j < qi.num_points; j++) {
The j here can be more descriptive to make it easy to understand. s/j/sagv probably?

Thanks,
Anusha
> +			const struct intel_qgv_point *sp = &qi.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
> +			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 *
> num_channels, ct);
> 
> -	ipqdepthpch = 16;
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * (100 - sa->derating) / 100);
> +
> +			drm_dbg_kms(&dev_priv->drm,
> +				    "BW%d / QGV %d: num_planes=%d
> deratedbw=%u\n",
> +				    i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +	}
> +	/*
> +	 * In case if SAGV is disabled in BIOS, we always get 1
> +	 * SAGV point, but we can't send PCode commands to restrict it
> +	 * as it will fail and pointless anyway.
> +	 */
> +	if (qi.num_points == 1)
> +		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
> +	else
> +		dev_priv->sagv_status = I915_SAGV_ENABLED;
> +
> +	return 0;
> +}
> +
> +static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const
> +struct intel_sa_info *sa) {
> +	struct intel_qgv_info qi = {};
> +	const struct dram_info *dram_info = &dev_priv->dram_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels = max_t(u8, 1, dev_priv-
> >dram_info.num_channels);
> +	int ipqdepth, ipqdepthpch = 16;
> +	int dclk_max;
> +	int maxdebw, peakbw;
> +	int clperchgroup;
> +	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
> +	if (ret) {
> +		drm_dbg_kms(&dev_priv->drm,
> +			    "Failed to get memory subsystem information,
> ignoring bandwidth limits");
> +		return ret;
> +	}
> +
> +	if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type ==
> INTEL_DRAM_LPDDR5)
> +		num_channels *= 2;
> +
> +	qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels,
> +is_y_tile ? 4 : 2);
> +
> +	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv)
> >= 12)
> +		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
> +
> +	if (DISPLAY_VER(dev_priv) > 11 && num_channels >
> qi.max_numchannels)
> +		drm_warn(&dev_priv->drm, "Number of channels exceeds
> max number of channels.");
> +	if (qi.max_numchannels != 0)
> +		num_channels = min_t(u8, num_channels,
> qi.max_numchannels);
> +
> +	dclk_max = icl_sagv_max_dclk(&qi);
> +
> +	peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) *
> dclk_max;
> +	maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60%
> */
> 
> -	maxdebw = min(sa->deprogbwlimit * 1000,
> -		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
>  	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +	/*
> +	 * clperchgroup = 4kpagespermempage * clperchperblock,
> +	 * clperchperblock = 8 / num_channels * interleave
> +	 */
> +	clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) *
> qi.deinterleave;
> 
> -	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +	for (i = 0; i < num_groups; i++) {
>  		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		struct intel_bw_info *bi_next;
>  		int clpchgroup;
>  		int j;
> 
> -		clpchgroup = (sa->deburst * deinterleave / num_channels) <<
> i;
> -		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +		if (i < num_groups - 1)
> +			bi_next = &dev_priv->max_bw[i + 1];
> +
> +		clpchgroup = (sa->deburst * qi.deinterleave / num_channels)
> << i;
> +
> +		if (i < num_groups - 1 && clpchgroup < clperchgroup)
> +			bi_next->num_planes = (ipqdepth - clpchgroup) /
> clpchgroup + 1;
> +		else
> +			bi_next->num_planes = 0;
> 
>  		bi->num_qgv_points = qi.num_points;
>  		bi->num_psf_gv_points = qi.num_psf_points; @@ -310,7
> +429,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv,
> const struct intel
>  			 */
>  			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
>  				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
> -			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 *
> num_channels, ct);
> +			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 *
> num_channels, ct);
> 
>  			bi->deratedbw[j] = min(maxdebw,
>  					       bw * (100 - sa->derating) / 100);
> @@ -329,9 +448,6 @@ static int icl_get_bw_info(struct drm_i915_private
> *dev_priv, const struct intel
>  				    "BW%d / PSF GV %d: num_planes=%d
> bw=%u\n",
>  				    i, j, bi->num_planes, bi->psf_bw[j]);
>  		}
> -
> -		if (bi->num_planes == 1)
> -			break;
>  	}
> 
>  	/*
> @@ -395,6 +511,34 @@ static unsigned int icl_max_bw(struct
> drm_i915_private *dev_priv,
>  	return 0;
>  }
> 
> +static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point) {
> +	int i;
> +
> +	/*
> +	 * Let's return max bw for 0 planes
> +	 */
> +	num_planes = max(1, num_planes);
> +
> +	for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		/*
> +		 * Pcode will not expose all QGV points when
> +		 * SAGV is forced to off/min/med/max.
> +		 */
> +		if (qgv_point >= bi->num_qgv_points)
> +			return UINT_MAX;
> +
> +		if (num_planes <= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return dev_priv->max_bw[0].deratedbw[qgv_point];
> +}
> +
>  static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
>  			       int psf_gv_point)
>  {
> @@ -412,13 +556,13 @@ void intel_bw_init_hw(struct drm_i915_private
> *dev_priv)
>  	if (IS_DG2(dev_priv))
>  		dg2_get_bw_info(dev_priv);
>  	else if (IS_ALDERLAKE_P(dev_priv))
> -		icl_get_bw_info(dev_priv, &adlp_sa_info);
> +		tgl_get_bw_info(dev_priv, &adlp_sa_info);
>  	else if (IS_ALDERLAKE_S(dev_priv))
> -		icl_get_bw_info(dev_priv, &adls_sa_info);
> +		tgl_get_bw_info(dev_priv, &adls_sa_info);
>  	else if (IS_ROCKETLAKE(dev_priv))
> -		icl_get_bw_info(dev_priv, &rkl_sa_info);
> +		tgl_get_bw_info(dev_priv, &rkl_sa_info);
>  	else if (DISPLAY_VER(dev_priv) == 12)
> -		icl_get_bw_info(dev_priv, &tgl_sa_info);
> +		tgl_get_bw_info(dev_priv, &tgl_sa_info);
>  	else if (DISPLAY_VER(dev_priv) == 11)
>  		icl_get_bw_info(dev_priv, &icl_sa_info);  } @@ -746,7
> +890,10 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
>  	for (i = 0; i < num_qgv_points; i++) {
>  		unsigned int max_data_rate;
> 
> -		max_data_rate = icl_max_bw(dev_priv, num_active_planes,
> i);
> +		if (DISPLAY_VER(dev_priv) > 11)
> +			max_data_rate = tgl_max_bw(dev_priv,
> num_active_planes, i);
> +		else
> +			max_data_rate = icl_max_bw(dev_priv,
> num_active_planes, i);
>  		/*
>  		 * We need to know which qgv point gives us
>  		 * maximum bandwidth in order to disable SAGV
> --
> 2.20.1





[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux