Re: [PATCH] drm/i915: Fix FIFO underruns caused by missing cumulative bpp W/A

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Apr 07, 2022 at 02:10:52PM +0300, Ville Syrjälä wrote:
> On Thu, Apr 07, 2022 at 11:42:35AM +0300, Stanislav Lisovskiy wrote:
> > We had some FIFO underruns appearing on platforms like ADL,
> > which could be fixed though by increasing CDCLK, however we were
> > lacking explanation for that - we were not calculating CDCLK,
> > also based on cumulative bpp W/A formula, mentioned in BSpec 64631.
> 
> We already have that in intel_bw_crtc_min_cdclk().

It actually is not quite what BSpec is talking about it adds
data_rate per plane, instead of bpp, I think it confuses 
those 2 from BSpec:

"
Plane required bandwidth MB/s = pixel rate MHz * source pixel format in bytes 
* plane down scale amount * pipe down scale amount
Display required memory bandwidth MB/s += Plane required bandwidth
Pipe cumulative bytes per pixel += plane source pixel format in bytes
"

then we have to different formulas used to estimate whats the CDCLK
should be, one is "DBUF maximum data buffer bandwidth MB/s = CDCLK frequency MHz * 64 Bytes"

another is pipe CDCLK = cumulative bytes per pixel * (pixel rate MHz * 
plane down scale amount * pipe down scale amount)) * 51.2)

So as I checked for some tests in kms_plane_multiple the estimation
from the latter was sometimes higher than the one we currently have and
it happened exactly when we did have FIFO underruns, otherwise CDCLK
stayed pretty much the same, which also shows that there is something
obvisously wrong with current calculations.

That patch fixes it and we don't have underruns at after it is applied.


Stan

> 
> > 
> > With that patch no FIFO underruns appear anymore.
> > 
> > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@xxxxxxxxx>
> > ---
> >  drivers/gpu/drm/i915/display/intel_bw.c | 71 ++++++++++++++++++++++---
> >  drivers/gpu/drm/i915/display/intel_bw.h |  2 +
> >  2 files changed, 67 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> > index 37bd7b17f3d0..3a2aeeffee7c 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.c
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> > @@ -743,20 +743,51 @@ static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  	}
> >  }
> >  
> > -static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> > +static int intel_plane_bw_bpp(const struct drm_format_info *info)
> > +{
> > +	/*
> > +	 * For the purposes of memory bandwidth calculations,
> > +	 * planar formats are treated as if both planes had the
> > +	 * same bpp (with no reduction for vertical
> > +	 * subsampling). I.e we take as usual the worst case
> > +	 * scenario.
> > +	 */
> > +	if (drm_format_info_is_yuv_semiplanar(info))
> > +		return 2 * max(info->cpp[0], info->cpp[1]);
> > +
> > +	return info->cpp[0];
> > +}
> > +
> > +static void skl_crtc_calc_dbuf_bw(struct intel_atomic_state *state,
> > +				  struct intel_bw_state *bw_state,
> >  				  const struct intel_crtc_state *crtc_state)
> >  {
> >  	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> >  	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> >  	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
> > -	enum plane_id plane_id;
> > +	struct intel_plane *plane;
> >  
> >  	memset(crtc_bw, 0, sizeof(*crtc_bw));
> >  
> >  	if (!crtc_state->hw.active)
> >  		return;
> >  
> > -	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
> > +		const struct drm_framebuffer *fb;
> > +		enum plane_id plane_id = plane->id;
> > +		unsigned int plane_bpp = 0;
> > +		struct intel_plane_state *plane_state =
> > +			intel_atomic_get_new_plane_state(state, plane);
> > +
> > +		if (plane_state) {
> > +			fb = plane_state->hw.fb;
> > +
> > +			if (plane_state->uapi.visible && fb)
> > +				plane_bpp = intel_plane_bw_bpp(fb->format);
> > +		}
> > +
> > +		crtc_bw->pipe_cumulative_bpp += plane_bpp;
> > +
> >  		/*
> >  		 * We assume cursors are small enough
> >  		 * to not cause bandwidth problems.
> > @@ -773,6 +804,10 @@ static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
> >  					       crtc_state->data_rate[plane_id]);
> >  	}
> > +
> > +	crtc_bw->bpp_cdclk = DIV_ROUND_UP_ULL(mul_u32_u32(crtc_state->pixel_rate,
> > +					      crtc_bw->pipe_cumulative_bpp * 512),
> > +					      10) / 1000;
> >  }
> >  
> >  /* "Maximum Data Buffer Bandwidth" */
> > @@ -782,11 +817,13 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> >  {
> >  	unsigned int total_max_bw = 0;
> >  	enum dbuf_slice slice;
> > +	enum pipe pipe;
> > +	unsigned int bpp_cdclk = 0;
> > +	unsigned int dbuf_bw_cdclk;
> >  
> >  	for_each_dbuf_slice(i915, slice) {
> >  		int num_active_planes = 0;
> >  		unsigned int max_bw = 0;
> > -		enum pipe pipe;
> >  
> >  		/*
> >  		 * The arbiter can only really guarantee an
> > @@ -803,7 +840,29 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> >  		total_max_bw = max(total_max_bw, max_bw);
> >  	}
> >  
> > -	return DIV_ROUND_UP(total_max_bw, 64);
> > +	for_each_pipe(i915, pipe) {
> > +		const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
> > +		/*
> > +		 * From BSpec 64631:
> > +		 * Pipe cumulative bytes should be less or equal to
> > +		 * CDCLK / (pixel_rate * scaling_factors * 51.2) thus
> > +		 * CDCLK = pipe_cumulative_bpp * pixel_rate * scaling_factors * 51.2.
> > +		 * Considering that intel_plane_pixel_rate already returns adjusted pixel rate,
> > +		 * no scaling factors needed here.
> > +		 */
> > +		bpp_cdclk = max_t(unsigned int, crtc_bw->bpp_cdclk,
> > +						bpp_cdclk);
> > +	}
> > +
> > +	dbuf_bw_cdclk = DIV_ROUND_UP(total_max_bw, 64);
> > +
> > +	/*
> > +	 * So now we have two CDCLK estimations:
> > +	 * one is based on required DBuf BW and another is
> > +	 * based on pipe cumulative bpp W/A(BSpec 64631)
> > +	 * Traditionally take the more demanding into use(worst case)
> > +	 */
> > +	return max_t(unsigned int, dbuf_bw_cdclk, bpp_cdclk);
> >  }
> >  
> >  int intel_bw_min_cdclk(struct drm_i915_private *i915,
> > @@ -842,7 +901,7 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state,
> >  
> >  		old_bw_state = intel_atomic_get_old_bw_state(state);
> >  
> > -		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
> > +		skl_crtc_calc_dbuf_bw(state, new_bw_state, crtc_state);
> >  
> >  		new_bw_state->min_cdclk[crtc->pipe] =
> >  			intel_bw_crtc_min_cdclk(crtc_state);
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
> > index cb7ee3a24a58..9e3a6ad03b19 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.h
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> > @@ -19,6 +19,8 @@ struct intel_crtc_state;
> >  struct intel_dbuf_bw {
> >  	unsigned int max_bw[I915_MAX_DBUF_SLICES];
> >  	u8 active_planes[I915_MAX_DBUF_SLICES];
> > +	unsigned int pipe_cumulative_bpp;
> > +	unsigned int bpp_cdclk;
> >  };
> >  
> >  struct intel_bw_state {
> > -- 
> > 2.24.1.485.gad05a3d8e5
> 
> -- 
> Ville Syrjälä
> Intel



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux