Replying to the right patch this time. >From what the bspec says, the changes look good. Minor feedback below inline. With that change, Reviewed-by: Anusha Srivatsa <anusha.srivatsa@xxxxxxxxx> > -----Original Message----- > From: Intel-gfx <intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of > Radhakrishna Sripada > Sent: Friday, October 15, 2021 2:01 PM > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: [PATCH v9] drm/i915: Update memory bandwidth > formulae > > The formulae has been updated to include more variables. Make sure the > code carries the same. > > Bspec: 64631, 54023 > > v2: Make GEN11 follow the default route and fix calculation of > maxdebw(RK) > v3: Fix div by zero on default case > Correct indent for fallthrough(Jani) > v4: Fix div by zero on gen11. > v5: Fix 0 max_numchannels case > v6: > - Split gen11/gen12 algorithms > - Fix RKL deburst value > - Fix difference b/ween ICL and TGL algorithms > - Protect deinterleave from being 0 > - Warn when numchannels exceeds max_numchannels > - Fix scaling of clk_max from different units > - s/deinterleave/channelwidth/ in calculating peakbw > - Fix off by one for num_planes TGL+ > - Fix SAGV check > v7: Fix div by zero error on gen11 > v8: Even though the algorithm for gen11 says that we need to return > derated bw for a qgv point whose planes are less than no of active > planes, we return 0 for deratedbw when only one plane is allowed. > We modify the algorithm to accommodate the case where no of active > planes are same as the min no of planes supported by a qgv point. > v9: Fix dclk scaling for dg1 > > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Suggested-by: Matt Roper <matthew.d.roper@xxxxxxxxx> > Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> > --- > drivers/gpu/drm/i915/display/intel_bw.c | 211 ++++++++++++++++++++---- > 1 file changed, 179 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c > b/drivers/gpu/drm/i915/display/intel_bw.c > index 8d9d888e9316..15c006194c85 100644 > --- a/drivers/gpu/drm/i915/display/intel_bw.c > +++ b/drivers/gpu/drm/i915/display/intel_bw.c > @@ -27,6 +27,9 @@ struct intel_qgv_info { > u8 num_points; > u8 num_psf_points; > u8 t_bl; > + u8 max_numchannels; > + u8 channel_width; > + u8 deinterleave; > }; > > static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private > *dev_priv, @@ -42,7 +45,7 @@ static int > dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, > dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ > else > dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ > - sp->dclk = dclk_ratio * dclk_reference; > + sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + > 500, > +1000); > > val = intel_uncore_read(&dev_priv->uncore, > SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); > if (val & DG1_GEAR_TYPE) > @@ -69,6 +72,7 @@ static int icl_pcode_read_qgv_point_info(struct > drm_i915_private *dev_priv, > int point) > { > u32 val = 0, val2 = 0; > + u16 dclk; > int ret; > > ret = sandybridge_pcode_read(dev_priv, @@ -78,7 +82,8 @@ static > int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, > if (ret) > return ret; > > - sp->dclk = val & 0xffff; > + dclk = val & 0xffff; > + sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > > 11 ? > +500 : 0), 1000); > sp->t_rp = (val & 0xff0000) >> 16; > sp->t_rcd = (val & 0xff000000) >> 24; > > @@ -133,7 +138,8 @@ int icl_pcode_restrict_qgv_points(struct > drm_i915_private *dev_priv, } > > static int icl_get_qgv_points(struct drm_i915_private *dev_priv, > - struct intel_qgv_info *qi) > + struct intel_qgv_info *qi, > + bool is_y_tile) > { > const struct dram_info *dram_info = &dev_priv->dram_info; > int i, ret; > @@ -144,17 +150,41 @@ static int icl_get_qgv_points(struct > drm_i915_private *dev_priv, > if (DISPLAY_VER(dev_priv) == 12) > switch (dram_info->type) { > case INTEL_DRAM_DDR4: > - qi->t_bl = 4; > + qi->t_bl = is_y_tile ? 8 : 4; > + qi->max_numchannels = 2; > + qi->channel_width = 64; > + qi->deinterleave = is_y_tile ? 1 : 2; > break; > case INTEL_DRAM_DDR5: > - qi->t_bl = 8; > + qi->t_bl = is_y_tile ? 16 : 8; > + qi->max_numchannels = 4; > + qi->channel_width = 32; > + qi->deinterleave = is_y_tile ? 1 : 2; > + break; > + case INTEL_DRAM_LPDDR4: > + if (IS_ROCKETLAKE(dev_priv)) { > + qi->t_bl = 8; > + qi->max_numchannels = 4; > + qi->channel_width = 32; > + qi->deinterleave = 2; > + break; > + } > + fallthrough; > + case INTEL_DRAM_LPDDR5: > + qi->t_bl = 16; > + qi->max_numchannels = 8; > + qi->channel_width = 16; > + qi->deinterleave = is_y_tile ? 2 : 4; > break; > default: > qi->t_bl = 16; > + qi->max_numchannels = 1; > break; > } > - else if (DISPLAY_VER(dev_priv) == 11) > + else if (DISPLAY_VER(dev_priv) == 11) { > qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? > 4 : 8; > + qi->max_numchannels = 1; > + } > > if (drm_WARN_ON(&dev_priv->drm, > qi->num_points > ARRAY_SIZE(qi->points))) @@ - > 193,12 +223,6 @@ static int icl_get_qgv_points(struct drm_i915_private > *dev_priv, > return 0; > } > > -static int icl_calc_bw(int dclk, int num, int den) -{ > - /* multiples of 16.666MHz (100/6) */ > - return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6); > -} > - > static int adl_calc_psf_bw(int clk) > { > /* > @@ -240,7 +264,7 @@ static const struct intel_sa_info tgl_sa_info = { }; > > static const struct intel_sa_info rkl_sa_info = { > - .deburst = 16, > + .deburst = 8, > .deprogbwlimit = 20, /* GB/s */ > .displayrtids = 128, > .derating = 10, > @@ -265,35 +289,130 @@ static int icl_get_bw_info(struct drm_i915_private > *dev_priv, const struct intel > struct intel_qgv_info qi = {}; > bool is_y_tile = true; /* assume y tile may be used */ > int num_channels = max_t(u8, 1, dev_priv- > >dram_info.num_channels); > - int deinterleave; > - int ipqdepth, ipqdepthpch; > + int ipqdepth, ipqdepthpch = 16; > int dclk_max; > int maxdebw; > + int num_groups = ARRAY_SIZE(dev_priv->max_bw); > int i, ret; > > - ret = icl_get_qgv_points(dev_priv, &qi); > + ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); > if (ret) { > drm_dbg_kms(&dev_priv->drm, > "Failed to get memory subsystem information, > ignoring bandwidth limits"); > return ret; > } > > - deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); > dclk_max = icl_sagv_max_dclk(&qi); > + maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10); > + ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); > + qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); > + > + for (i = 0; i < num_groups; i++) { > + struct intel_bw_info *bi = &dev_priv->max_bw[i]; > + int clpchgroup; > + int j; > + > + clpchgroup = (sa->deburst * qi.deinterleave / num_channels) > << i; > + bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; > + > + bi->num_qgv_points = qi.num_points; > + bi->num_psf_gv_points = qi.num_psf_points; > + > + for (j = 0; j < qi.num_points; j++) { The j here can be more descriptive to make it easy to understand. s/j/sagv probably? Thanks, Anusha > + const struct intel_qgv_point *sp = &qi.points[j]; > + int ct, bw; > + > + /* > + * Max row cycle time > + * > + * FIXME what is the logic behind the > + * assumed burst length? > + */ > + ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + > + (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); > + bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * > num_channels, ct); > > - ipqdepthpch = 16; > + bi->deratedbw[j] = min(maxdebw, > + bw * (100 - sa->derating) / 100); > + > + drm_dbg_kms(&dev_priv->drm, > + "BW%d / QGV %d: num_planes=%d > deratedbw=%u\n", > + i, j, bi->num_planes, bi->deratedbw[j]); > + } > + } > + /* > + * In case if SAGV is disabled in BIOS, we always get 1 > + * SAGV point, but we can't send PCode commands to restrict it > + * as it will fail and pointless anyway. > + */ > + if (qi.num_points == 1) > + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; > + else > + dev_priv->sagv_status = I915_SAGV_ENABLED; > + > + return 0; > +} > + > +static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const > +struct intel_sa_info *sa) { > + struct intel_qgv_info qi = {}; > + const struct dram_info *dram_info = &dev_priv->dram_info; > + bool is_y_tile = true; /* assume y tile may be used */ > + int num_channels = max_t(u8, 1, dev_priv- > >dram_info.num_channels); > + int ipqdepth, ipqdepthpch = 16; > + int dclk_max; > + int maxdebw, peakbw; > + int clperchgroup; > + int num_groups = ARRAY_SIZE(dev_priv->max_bw); > + int i, ret; > + > + ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); > + if (ret) { > + drm_dbg_kms(&dev_priv->drm, > + "Failed to get memory subsystem information, > ignoring bandwidth limits"); > + return ret; > + } > + > + if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == > INTEL_DRAM_LPDDR5) > + num_channels *= 2; > + > + qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, > +is_y_tile ? 4 : 2); > + > + if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) > >= 12) > + qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); > + > + if (DISPLAY_VER(dev_priv) > 11 && num_channels > > qi.max_numchannels) > + drm_warn(&dev_priv->drm, "Number of channels exceeds > max number of channels."); > + if (qi.max_numchannels != 0) > + num_channels = min_t(u8, num_channels, > qi.max_numchannels); > + > + dclk_max = icl_sagv_max_dclk(&qi); > + > + peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * > dclk_max; > + maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% > */ > > - maxdebw = min(sa->deprogbwlimit * 1000, > - icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */ > ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); > + /* > + * clperchgroup = 4kpagespermempage * clperchperblock, > + * clperchperblock = 8 / num_channels * interleave > + */ > + clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * > qi.deinterleave; > > - for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { > + for (i = 0; i < num_groups; i++) { > struct intel_bw_info *bi = &dev_priv->max_bw[i]; > + struct intel_bw_info *bi_next; > int clpchgroup; > int j; > > - clpchgroup = (sa->deburst * deinterleave / num_channels) << > i; > - bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; > + if (i < num_groups - 1) > + bi_next = &dev_priv->max_bw[i + 1]; > + > + clpchgroup = (sa->deburst * qi.deinterleave / num_channels) > << i; > + > + if (i < num_groups - 1 && clpchgroup < clperchgroup) > + bi_next->num_planes = (ipqdepth - clpchgroup) / > clpchgroup + 1; > + else > + bi_next->num_planes = 0; > > bi->num_qgv_points = qi.num_points; > bi->num_psf_gv_points = qi.num_psf_points; @@ -310,7 > +429,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, > const struct intel > */ > ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + > (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); > - bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * > num_channels, ct); > + bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * > num_channels, ct); > > bi->deratedbw[j] = min(maxdebw, > bw * (100 - sa->derating) / 100); > @@ -329,9 +448,6 @@ static int icl_get_bw_info(struct drm_i915_private > *dev_priv, const struct intel > "BW%d / PSF GV %d: num_planes=%d > bw=%u\n", > i, j, bi->num_planes, bi->psf_bw[j]); > } > - > - if (bi->num_planes == 1) > - break; > } > > /* > @@ -395,6 +511,34 @@ static unsigned int icl_max_bw(struct > drm_i915_private *dev_priv, > return 0; > } > > +static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv, > + int num_planes, int qgv_point) { > + int i; > + > + /* > + * Let's return max bw for 0 planes > + */ > + num_planes = max(1, num_planes); > + > + for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) { > + const struct intel_bw_info *bi = > + &dev_priv->max_bw[i]; > + > + /* > + * Pcode will not expose all QGV points when > + * SAGV is forced to off/min/med/max. > + */ > + if (qgv_point >= bi->num_qgv_points) > + return UINT_MAX; > + > + if (num_planes <= bi->num_planes) > + return bi->deratedbw[qgv_point]; > + } > + > + return dev_priv->max_bw[0].deratedbw[qgv_point]; > +} > + > static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, > int psf_gv_point) > { > @@ -412,13 +556,13 @@ void intel_bw_init_hw(struct drm_i915_private > *dev_priv) > if (IS_DG2(dev_priv)) > dg2_get_bw_info(dev_priv); > else if (IS_ALDERLAKE_P(dev_priv)) > - icl_get_bw_info(dev_priv, &adlp_sa_info); > + tgl_get_bw_info(dev_priv, &adlp_sa_info); > else if (IS_ALDERLAKE_S(dev_priv)) > - icl_get_bw_info(dev_priv, &adls_sa_info); > + tgl_get_bw_info(dev_priv, &adls_sa_info); > else if (IS_ROCKETLAKE(dev_priv)) > - icl_get_bw_info(dev_priv, &rkl_sa_info); > + tgl_get_bw_info(dev_priv, &rkl_sa_info); > else if (DISPLAY_VER(dev_priv) == 12) > - icl_get_bw_info(dev_priv, &tgl_sa_info); > + tgl_get_bw_info(dev_priv, &tgl_sa_info); > else if (DISPLAY_VER(dev_priv) == 11) > icl_get_bw_info(dev_priv, &icl_sa_info); } @@ -746,7 > +890,10 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) > for (i = 0; i < num_qgv_points; i++) { > unsigned int max_data_rate; > > - max_data_rate = icl_max_bw(dev_priv, num_active_planes, > i); > + if (DISPLAY_VER(dev_priv) > 11) > + max_data_rate = tgl_max_bw(dev_priv, > num_active_planes, i); > + else > + max_data_rate = icl_max_bw(dev_priv, > num_active_planes, i); > /* > * We need to know which qgv point gives us > * maximum bandwidth in order to disable SAGV > -- > 2.20.1