> -----Original Message----- > From: Srivatsa, Anusha <anusha.srivatsa@xxxxxxxxx> > Sent: Thursday, October 28, 2021 2:04 PM > To: Sripada, Radhakrishna <radhakrishna.sripada@xxxxxxxxx>; intel- > gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: RE: [PATCH v9] drm/i915: Update memory bandwidth > formulae > > Replying to the right patch this time. > From what the bspec says, the changes look good. > Minor feedback below inline. > > With that change, > Reviewed-by: Anusha Srivatsa <anusha.srivatsa@xxxxxxxxx> > > > > -----Original Message----- > > From: Intel-gfx <intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of > > Radhakrishna Sripada > > Sent: Friday, October 15, 2021 2:01 PM > > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > Subject: [PATCH v9] drm/i915: Update memory bandwidth > > formulae > > > > The formulae has been updated to include more variables. Make sure the > > code carries the same. > > > > Bspec: 64631, 54023 > > > > v2: Make GEN11 follow the default route and fix calculation of > > maxdebw(RK) > > v3: Fix div by zero on default case > > Correct indent for fallthrough(Jani) > > v4: Fix div by zero on gen11. > > v5: Fix 0 max_numchannels case > > v6: > > - Split gen11/gen12 algorithms > > - Fix RKL deburst value > > - Fix difference b/ween ICL and TGL algorithms > > - Protect deinterleave from being 0 > > - Warn when numchannels exceeds max_numchannels > > - Fix scaling of clk_max from different units > > - s/deinterleave/channelwidth/ in calculating peakbw > > - Fix off by one for num_planes TGL+ > > - Fix SAGV check > > v7: Fix div by zero error on gen11 > > v8: Even though the algorithm for gen11 says that we need to return > > derated bw for a qgv point whose planes are less than no of active > > planes, we return 0 for deratedbw when only one plane is allowed. > > We modify the algorithm to accommodate the case where no of active > > planes are same as the min no of planes supported by a qgv point. > > v9: Fix dclk scaling for dg1 > > > > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > > Suggested-by: Matt Roper <matthew.d.roper@xxxxxxxxx> > > Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/display/intel_bw.c | 211 ++++++++++++++++++++---- > > 1 file changed, 179 insertions(+), 32 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c > > b/drivers/gpu/drm/i915/display/intel_bw.c > > index 8d9d888e9316..15c006194c85 100644 > > --- a/drivers/gpu/drm/i915/display/intel_bw.c > > +++ b/drivers/gpu/drm/i915/display/intel_bw.c > > @@ -27,6 +27,9 @@ struct intel_qgv_info { > > u8 num_points; > > u8 num_psf_points; > > u8 t_bl; > > + u8 max_numchannels; > > + u8 channel_width; > > + u8 deinterleave; > > }; > > > > static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private > > *dev_priv, @@ -42,7 +45,7 @@ static int > > dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, > > dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ > > else > > dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ > > - sp->dclk = dclk_ratio * dclk_reference; > > + sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + > > 500, > > +1000); > > > > val = intel_uncore_read(&dev_priv->uncore, > > SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); > > if (val & DG1_GEAR_TYPE) > > @@ -69,6 +72,7 @@ static int icl_pcode_read_qgv_point_info(struct > > drm_i915_private *dev_priv, > > int point) > > { > > u32 val = 0, val2 = 0; > > + u16 dclk; > > int ret; > > > > ret = sandybridge_pcode_read(dev_priv, @@ -78,7 +82,8 @@ static > > int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, > > if (ret) > > return ret; > > > > - sp->dclk = val & 0xffff; > > + dclk = val & 0xffff; > > + sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > > > 11 ? > > +500 : 0), 1000); > > sp->t_rp = (val & 0xff0000) >> 16; > > sp->t_rcd = (val & 0xff000000) >> 24; > > > > @@ -133,7 +138,8 @@ int icl_pcode_restrict_qgv_points(struct > > drm_i915_private *dev_priv, } > > > > static int icl_get_qgv_points(struct drm_i915_private *dev_priv, > > - struct intel_qgv_info *qi) > > + struct intel_qgv_info *qi, > > + bool is_y_tile) > > { > > const struct dram_info *dram_info = &dev_priv->dram_info; > > int i, ret; > > @@ -144,17 +150,41 @@ static int icl_get_qgv_points(struct > > drm_i915_private *dev_priv, > > if (DISPLAY_VER(dev_priv) == 12) > > switch (dram_info->type) { > > case INTEL_DRAM_DDR4: > > - qi->t_bl = 4; > > + qi->t_bl = is_y_tile ? 8 : 4; > > + qi->max_numchannels = 2; > > + qi->channel_width = 64; > > + qi->deinterleave = is_y_tile ? 1 : 2; > > break; > > case INTEL_DRAM_DDR5: > > - qi->t_bl = 8; > > + qi->t_bl = is_y_tile ? 16 : 8; > > + qi->max_numchannels = 4; > > + qi->channel_width = 32; > > + qi->deinterleave = is_y_tile ? 1 : 2; > > + break; > > + case INTEL_DRAM_LPDDR4: > > + if (IS_ROCKETLAKE(dev_priv)) { > > + qi->t_bl = 8; > > + qi->max_numchannels = 4; > > + qi->channel_width = 32; > > + qi->deinterleave = 2; > > + break; > > + } > > + fallthrough; > > + case INTEL_DRAM_LPDDR5: > > + qi->t_bl = 16; > > + qi->max_numchannels = 8; > > + qi->channel_width = 16; > > + qi->deinterleave = is_y_tile ? 2 : 4; > > break; > > default: > > qi->t_bl = 16; > > + qi->max_numchannels = 1; > > break; > > } > > - else if (DISPLAY_VER(dev_priv) == 11) > > + else if (DISPLAY_VER(dev_priv) == 11) { > > qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? > > 4 : 8; > > + qi->max_numchannels = 1; > > + } > > > > if (drm_WARN_ON(&dev_priv->drm, > > qi->num_points > ARRAY_SIZE(qi->points))) @@ - > > 193,12 +223,6 @@ static int icl_get_qgv_points(struct drm_i915_private > > *dev_priv, > > return 0; > > } > > > > -static int icl_calc_bw(int dclk, int num, int den) -{ > > - /* multiples of 16.666MHz (100/6) */ > > - return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6); > > -} > > - > > static int adl_calc_psf_bw(int clk) > > { > > /* > > @@ -240,7 +264,7 @@ static const struct intel_sa_info tgl_sa_info = { }; > > > > static const struct intel_sa_info rkl_sa_info = { > > - .deburst = 16, > > + .deburst = 8, > > .deprogbwlimit = 20, /* GB/s */ > > .displayrtids = 128, > > .derating = 10, > > @@ -265,35 +289,130 @@ static int icl_get_bw_info(struct drm_i915_private > > *dev_priv, const struct intel > > struct intel_qgv_info qi = {}; > > bool is_y_tile = true; /* assume y tile may be used */ > > int num_channels = max_t(u8, 1, dev_priv- > > >dram_info.num_channels); > > - int deinterleave; > > - int ipqdepth, ipqdepthpch; > > + int ipqdepth, ipqdepthpch = 16; > > int dclk_max; > > int maxdebw; > > + int num_groups = ARRAY_SIZE(dev_priv->max_bw); > > int i, ret; > > > > - ret = icl_get_qgv_points(dev_priv, &qi); > > + ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); > > if (ret) { > > drm_dbg_kms(&dev_priv->drm, > > "Failed to get memory subsystem information, > > ignoring bandwidth limits"); > > return ret; > > } > > > > - deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); > > dclk_max = icl_sagv_max_dclk(&qi); > > + maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10); > > + ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); > > + qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); > > + > > + for (i = 0; i < num_groups; i++) { > > + struct intel_bw_info *bi = &dev_priv->max_bw[i]; > > + int clpchgroup; > > + int j; > > + > > + clpchgroup = (sa->deburst * qi.deinterleave / num_channels) > > << i; > > + bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; > > + > > + bi->num_qgv_points = qi.num_points; > > + bi->num_psf_gv_points = qi.num_psf_points; > > + > > + for (j = 0; j < qi.num_points; j++) { > The j here can be more descriptive to make it easy to understand. s/j/sagv > probably? I am not sure if this case warrants a departure from standard convention of naming iterators. This iterator j is used in the current version of the code and I am skeptical about changing it. Thanks for the review, Radhakrishna(RK) Sripada > > Thanks, > Anusha > > + const struct intel_qgv_point *sp = &qi.points[j]; > > + int ct, bw; > > + > > + /* > > + * Max row cycle time > > + * > > + * FIXME what is the logic behind the > > + * assumed burst length? > > + */ > > + ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + > > + (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); > > + bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * > > num_channels, ct); > > > > - ipqdepthpch = 16; > > + bi->deratedbw[j] = min(maxdebw, > > + bw * (100 - sa->derating) / 100); > > + > > + drm_dbg_kms(&dev_priv->drm, > > + "BW%d / QGV %d: num_planes=%d > > deratedbw=%u\n", > > + i, j, bi->num_planes, bi->deratedbw[j]); > > + } > > + } > > + /* > > + * In case if SAGV is disabled in BIOS, we always get 1 > > + * SAGV point, but we can't send PCode commands to restrict it > > + * as it will fail and pointless anyway. > > + */ > > + if (qi.num_points == 1) > > + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; > > + else > > + dev_priv->sagv_status = I915_SAGV_ENABLED; > > + > > + return 0; > > +} > > + > > +static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const > > +struct intel_sa_info *sa) { > > + struct intel_qgv_info qi = {}; > > + const struct dram_info *dram_info = &dev_priv->dram_info; > > + bool is_y_tile = true; /* assume y tile may be used */ > > + int num_channels = max_t(u8, 1, dev_priv- > > >dram_info.num_channels); > > + int ipqdepth, ipqdepthpch = 16; > > + int dclk_max; > > + int maxdebw, peakbw; > > + int clperchgroup; > > + int num_groups = ARRAY_SIZE(dev_priv->max_bw); > > + int i, ret; > > + > > + ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); > > + if (ret) { > > + drm_dbg_kms(&dev_priv->drm, > > + "Failed to get memory subsystem information, > > ignoring bandwidth limits"); > > + return ret; > > + } > > + > > + if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == > > INTEL_DRAM_LPDDR5) > > + num_channels *= 2; > > + > > + qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, > > +is_y_tile ? 4 : 2); > > + > > + if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) > > >= 12) > > + qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); > > + > > + if (DISPLAY_VER(dev_priv) > 11 && num_channels > > > qi.max_numchannels) > > + drm_warn(&dev_priv->drm, "Number of channels exceeds > > max number of channels."); > > + if (qi.max_numchannels != 0) > > + num_channels = min_t(u8, num_channels, > > qi.max_numchannels); > > + > > + dclk_max = icl_sagv_max_dclk(&qi); > > + > > + peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * > > dclk_max; > > + maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% > > */ > > > > - maxdebw = min(sa->deprogbwlimit * 1000, > > - icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */ > > ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); > > + /* > > + * clperchgroup = 4kpagespermempage * clperchperblock, > > + * clperchperblock = 8 / num_channels * interleave > > + */ > > + clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * > > qi.deinterleave; > > > > - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { > > + for (i = 0; i < num_groups; i++) { > > struct intel_bw_info *bi = &dev_priv->max_bw[i]; > > + struct intel_bw_info *bi_next; > > int clpchgroup; > > int j; > > > > - clpchgroup = (sa->deburst * deinterleave / num_channels) << > > i; > > - bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; > > + if (i < num_groups - 1) > > + bi_next = &dev_priv->max_bw[i + 1]; > > + > > + clpchgroup = (sa->deburst * qi.deinterleave / num_channels) > > << i; > > + > > + if (i < num_groups - 1 && clpchgroup < clperchgroup) > > + bi_next->num_planes = (ipqdepth - clpchgroup) / > > clpchgroup + 1; > > + else > > + bi_next->num_planes = 0; > > > > bi->num_qgv_points = qi.num_points; > > bi->num_psf_gv_points = qi.num_psf_points; @@ -310,7 > > +429,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, > > const struct intel > > */ > > ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + > > (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); > > - bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * > > num_channels, ct); > > + bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * > > num_channels, ct); > > > > bi->deratedbw[j] = min(maxdebw, > > bw * (100 - sa->derating) / 100); > > @@ -329,9 +448,6 @@ static int icl_get_bw_info(struct drm_i915_private > > *dev_priv, const struct intel > > "BW%d / PSF GV %d: num_planes=%d > > bw=%u\n", > > i, j, bi->num_planes, bi->psf_bw[j]); > > } > > - > > - if (bi->num_planes == 1) > > - break; > > } > > > > /* > > @@ -395,6 +511,34 @@ static unsigned int icl_max_bw(struct > > drm_i915_private *dev_priv, > > return 0; > > } > > > > +static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv, > > + int num_planes, int qgv_point) { > > + int i; > > + > > + /* > > + * Let's return max bw for 0 planes > > + */ > > + num_planes = max(1, num_planes); > > + > > + for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) { > > + const struct intel_bw_info *bi = > > + &dev_priv->max_bw[i]; > > + > > + /* > > + * Pcode will not expose all QGV points when > > + * SAGV is forced to off/min/med/max. > > + */ > > + if (qgv_point >= bi->num_qgv_points) > > + return UINT_MAX; > > + > > + if (num_planes <= bi->num_planes) > > + return bi->deratedbw[qgv_point]; > > + } > > + > > + return dev_priv->max_bw[0].deratedbw[qgv_point]; > > +} > > + > > static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, > > int psf_gv_point) > > { > > @@ -412,13 +556,13 @@ void intel_bw_init_hw(struct drm_i915_private > > *dev_priv) > > if (IS_DG2(dev_priv)) > > dg2_get_bw_info(dev_priv); > > else if (IS_ALDERLAKE_P(dev_priv)) > > - icl_get_bw_info(dev_priv, &adlp_sa_info); > > + tgl_get_bw_info(dev_priv, &adlp_sa_info); > > else if (IS_ALDERLAKE_S(dev_priv)) > > - icl_get_bw_info(dev_priv, &adls_sa_info); > > + tgl_get_bw_info(dev_priv, &adls_sa_info); > > else if (IS_ROCKETLAKE(dev_priv)) > > - icl_get_bw_info(dev_priv, &rkl_sa_info); > > + tgl_get_bw_info(dev_priv, &rkl_sa_info); > > else if (DISPLAY_VER(dev_priv) == 12) > > - icl_get_bw_info(dev_priv, &tgl_sa_info); > > + tgl_get_bw_info(dev_priv, &tgl_sa_info); > > else if (DISPLAY_VER(dev_priv) == 11) > > icl_get_bw_info(dev_priv, &icl_sa_info); } @@ -746,7 > > +890,10 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) > > for (i = 0; i < num_qgv_points; i++) { > > unsigned int max_data_rate; > > > > - max_data_rate = icl_max_bw(dev_priv, num_active_planes, > > i); > > + if (DISPLAY_VER(dev_priv) > 11) > > + max_data_rate = tgl_max_bw(dev_priv, > > num_active_planes, i); > > + else > > + max_data_rate = icl_max_bw(dev_priv, > > num_active_planes, i); > > /* > > * We need to know which qgv point gives us > > * maximum bandwidth in order to disable SAGV > > -- > > 2.20.1