Re: [PATCH v2 8/9] drm/i915: Fix DBUF bandwidth vs. cdclk handling

Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> · Thu, 10 Mar 2022 10:59:16 +0200

On Thu, Mar 10, 2022 at 10:22:56AM +0200, Lisovskiy, Stanislav wrote:
> On Thu, Mar 03, 2022 at 09:12:06PM +0200, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> > 
> > Make the dbuf bandwidth min cdclk calculations match the spec
> > more closely. Supposedly the arbiter can only guarantee an equal
> > share of the total bandwidth of the slice to each active plane
> > on that slice. 
> 
> So are we assuming or really know this? May be we should ask
> Arthur(or may be you did already) because could be it works
> somehow differently yet again.

It's what the spec formula says atm. Also IIRC when I asked Art 
about this and he said it's like that to account for the worst
case behaviour of the arbiter.

> 
> > So we take the max bandwidth of any of the planes
> > on each slice and multiply that by the number of active planes
> > on the slice to get a worst case estimate on how much bandwidth
> > we require.
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> > ---
> >  drivers/gpu/drm/i915/display/intel_bw.c    | 159 +++++++++++++++------
> >  drivers/gpu/drm/i915/display/intel_bw.h    |  10 +-
> >  drivers/gpu/drm/i915/display/intel_cdclk.c |  67 ++++-----
> >  drivers/gpu/drm/i915/display/intel_cdclk.h |   2 +
> >  4 files changed, 148 insertions(+), 90 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> > index 56eebccd16e2..ed86f3e3c66c 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.c
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> > @@ -692,12 +692,34 @@ static bool intel_bw_state_changed(struct drm_i915_private *i915,
> >  		enum dbuf_slice slice;
> >  
> >  		for_each_dbuf_slice(i915, slice) {
> > -			if (old_crtc_bw->used_bw[slice] != new_crtc_bw->used_bw[slice])
> > +			if (old_crtc_bw->max_bw[slice] != new_crtc_bw->max_bw[slice] ||
> > +			    old_crtc_bw->active_planes[slice] != new_crtc_bw->active_planes[slice])
> >  				return true;
> >  		}
> >  	}
> >  
> > -	return old_bw_state->min_cdclk != new_bw_state->min_cdclk;
> > +	return false;
> > +}
> > +
> > +static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
> > +				   struct intel_crtc *crtc,
> > +				   enum plane_id plane_id,
> > +				   const struct skl_ddb_entry *ddb,
> > +				   unsigned int data_rate)
> > +{
> > +	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> > +	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
> > +	unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(i915, ddb);
> > +	enum dbuf_slice slice;
> > +
> > +	/*
> > +	 * The arbiter can only really guarantee an
> > +	 * equal share of the total bw to each plane.
> > +	 */
> > +	for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask) {
> > +		crtc_bw->max_bw[slice] = max(crtc_bw->max_bw[slice], data_rate);
> 
> So does the crtc_bw->max_bw[slice] not store plane data rate which consumes
> the most, I guess? Wondering should we name it bit somewhat differently.
> Like max_plane_bw or smth, because crtc_bw->max_bw[slice] sounds more like
> total bw consumed by slice, but not plane.

Naming is hard.

> 
> > +		crtc_bw->active_planes[slice] |= BIT(plane_id);
> > +	}
> >  }
> >  
> >  static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> > @@ -708,46 +730,77 @@ static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
> >  	enum plane_id plane_id;
> >  
> > -	memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
> > +	memset(crtc_bw, 0, sizeof(*crtc_bw));
> >  
> >  	if (!crtc_state->hw.active)
> >  		return;
> >  
> >  	for_each_plane_id_on_crtc(crtc, plane_id) {
> > -		const struct skl_ddb_entry *ddb =
> > -			&crtc_state->wm.skl.plane_ddb[plane_id];
> > -		unsigned int data_rate = crtc_state->data_rate[plane_id];
> > -		unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(i915, ddb);
> > -		enum dbuf_slice slice;
> > -
> > -		for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask)
> > -			crtc_bw->used_bw[slice] += data_rate;
> > +		/*
> > +		 * We assume cursors are small enough
> > +		 * to not cause bandwidth problems.
> > +		 */
> > +		if (plane_id == PLANE_CURSOR)
> > +			continue;
> > +
> > +		skl_plane_calc_dbuf_bw(bw_state, crtc, plane_id,
> > +				       &crtc_state->wm.skl.plane_ddb[plane_id],
> > +				       crtc_state->data_rate[plane_id]);
> > +
> > +		if (DISPLAY_VER(i915) < 11)
> > +			skl_plane_calc_dbuf_bw(bw_state, crtc, plane_id,
> > +					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
> > +					       crtc_state->data_rate[plane_id]);
> >  	}
> > +}
> > +
> > +/* "Maximum Data Buffer Bandwidth" */
> > +static int
> > +intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> > +			const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int total_max_bw = 0;
> > +	enum dbuf_slice slice;
> >  
> > -	if (DISPLAY_VER(i915) >= 11)
> > -		return;
> > +	for_each_dbuf_slice(i915, slice) {
> > +		int num_active_planes = 0;
> > +		unsigned int max_bw = 0;
> > +		enum pipe pipe;
> >  
> > -	for_each_plane_id_on_crtc(crtc, plane_id) {
> > -		const struct skl_ddb_entry *ddb =
> > -			&crtc_state->wm.skl.plane_ddb_y[plane_id];
> > -		unsigned int data_rate = crtc_state->data_rate_y[plane_id];
> > -		unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(i915, ddb);
> > -		enum dbuf_slice slice;
> > +		/*
> > +		 * The arbiter can only really guarantee an
> > +		 * equal share of the total bw to each plane.
> > +		 */
> > +		for_each_pipe(i915, pipe) {
> > +			const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
> >  
> > -		for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask)
> > -			crtc_bw->used_bw[slice] += data_rate;
> > +			max_bw = max(crtc_bw->max_bw[slice], max_bw);
> > +			num_active_planes += hweight8(crtc_bw->active_planes[slice]);
> > +		}
> > +		max_bw *= num_active_planes;
> 
> So we are now putting much more strict limitation here. I.e assuming that if there are
> lets say planes 1,2,3. Plane 1 consumes 3000, Plane 2 consumes 1000, Plane 3 consumes 1000.
> 
> All together they consume 5000 so we kinda derive CDCLK from there(i.e 5000/64)
> This is basically what formula also says in the BSpec, i.e it just instruct us
> to add all those planes and check that we don't exceed CDCLK * 64 in total.
> 
> However now we are going to take plane 1, since it consumes the most and multiply that 3000
> by 3, i.e get 9000 instead of 5000, which would result in a way higher CDCLK.
> 
> I think you got this assumption from that place in BSpec:
> 
> DBUF maximum plane bandwidth MB/s = DBUF maximum pipe bandwidth / number of enabled planes
> 
> In fact sounds just about right, would be probably still nice to clarify that from HW
> team. I remember we even discussed that long time back in the office days.

Yeah, that bspec formula takes the total available bandwidth for the DBUF 
slice, divides that by the number of active planes using the slice, and
checks each plane against that. So should be exactly what I do here,
except in reverse.

-- 
Ville Syrjälä
Intel