Re: [PATCH 5/8] drm/i915/fbc: Rework cfb stride/size calculations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> Sent: Tuesday, September 21, 2021 8:27 PM
> To: Shankar, Uma <uma.shankar@xxxxxxxxx>
> Cc: intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> Subject: Re:  [PATCH 5/8] drm/i915/fbc: Rework cfb stride/size
> calculations
> 
> On Mon, Sep 06, 2021 at 05:23:42AM +0000, Shankar, Uma wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-gfx <intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf
> > > Of Ville Syrjala
> > > Sent: Saturday, July 3, 2021 2:16 AM
> > > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > > Subject:  [PATCH 5/8] drm/i915/fbc: Rework cfb
> > > stride/size calculations
> > >
> > > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> > >
> > > The code to calculate the cfb stride/size is a bit of mess.
> > > The cfb size is getting calculated based purely on the plane stride and plane
> height.
> > > That doesn't account for extra alignment we want for the cfb stride.
> > > The gen9 override stride OTOH is just calculated based on the plane
> > > width, and it does try to make things more aligned but any extra
> > > alignment added there is not considered in the cfb size calculations.
> > > So not at all convinced this is working as intended. Additionally
> > > the compression limit handling is split between the cfb allocation
> > > code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing.
> > >
> > > Let's streamline the whole thing:
> > > - Start with the plane stride, convert that into cfb stride (cfb is
> > >   always 4 bytes per pixel). All the calculations will assume 1:1
> > >   compression limit since that will give us the max values, and we
> > >   don't yet know how much stolen memory we will be able to allocate
> > > - Align the cfb stride to 512 bytes on modern platforms. This guarantees
> > >   the 4 line segment will be 512 byte aligned regardles of the final
> > >   compression limit we choose later. The 512 byte alignment for the
> > >   segment is required by at least some of the platforms, and just doing
> > >   it always seems like the easiest option
> > > - Figure out if we need to use the override stride or not. For X-tiled
> > >   it's never needed since the plane stride is already 512 byte aligned,
> > >   for Y-tiled it will be needed if the plane stride is not a multiple
> > >   of 512 bytes, and for linear it's apparently always needed because the
> > >   hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of
> > >   the PLANE_STRIDE*64 that it use with linear.
> > > - The cfb size will be calculated based on the aligned cfb stride to
> > >   guarantee we actually reserved enough stolen memory and the FBC hw
> > >   won't end up scribbling over whatever else is allocated in stolen
> > > - The compression limit handling we just do fully in the cfb allocation
> > >   code to make things less confusing
> > >
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> > > ---
> > >  drivers/gpu/drm/i915/display/intel_fbc.c | 141 ++++++++++++++---------
> > >  drivers/gpu/drm/i915/i915_drv.h          |   4 +-
> > >  2 files changed, 90 insertions(+), 55 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c
> > > b/drivers/gpu/drm/i915/display/intel_fbc.c
> > > index f5cbbc53837c..2baf58af016c 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_fbc.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_fbc.c
> > > @@ -62,19 +62,54 @@ static void
> > > intel_fbc_get_plane_source_size(const struct intel_fbc_state_cache *
> > >  		*height = cache->plane.src_h;
> > >  }
> > >
> > > -static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
> > > -					const struct intel_fbc_state_cache *cache)
> > > +/* plane stride in pixels */
> > > +static unsigned int intel_fbc_plane_stride(const struct
> > > +intel_plane_state *plane_state)
> > >  {
> > > -	int lines;
> > > +	const struct drm_framebuffer *fb = plane_state->hw.fb;
> > > +	unsigned int stride;
> > > +
> > > +	stride = plane_state->view.color_plane[0].stride;
> > > +	if (!drm_rotation_90_or_270(plane_state->hw.rotation))
> > > +		stride /= fb->format->cpp[0];
> > > +
> > > +	return stride;
> > > +}
> > > +
> > > +/* plane stride based cfb stride in bytes, assuming 1:1 compression
> > > +limit */ static unsigned int _intel_fbc_cfb_stride(const struct
> > > +intel_fbc_state_cache *cache) {
> > > +	/* FBC always 4 bytes per pixel internally */
> > > +	return cache->fb.stride * 4;
> > > +}
> > > +
> > > +/* properly aligned cfb stride in bytes, assuming 1:1 compression
> > > +limit */ static unsigned int intel_fbc_cfb_stride(struct drm_i915_private *i915,
> > > +					 const struct intel_fbc_state_cache *cache)
> > > {
> > > +	unsigned int stride = _intel_fbc_cfb_stride(cache);
> > > +
> > > +	/*
> > > +	 * At least some of the platforms require each 4 line segment to
> > > +	 * be 512 byte aligned. Aligning each line to 512 bytes guarantees
> > > +	 * that regardless of the compression limit we choose later.
> > > +	 */
> > > +	if (DISPLAY_VER(i915) == 9)
> > > +		return ALIGN(stride, 512);
> > > +	else
> > > +		return stride;
> > > +}
> > > +
> > > +static unsigned int intel_fbc_cfb_size(struct drm_i915_private *dev_priv,
> > > +				       const struct intel_fbc_state_cache *cache) {
> > > +	int lines = cache->plane.src_h;
> > >
> > > -	intel_fbc_get_plane_source_size(cache, NULL, &lines);
> > >  	if (DISPLAY_VER(dev_priv) == 7)
> > >  		lines = min(lines, 2048);
> > >  	else if (DISPLAY_VER(dev_priv) >= 8)
> > >  		lines = min(lines, 2560);
> > >
> > > -	/* Hardware needs the full buffer stride, not just the active area. */
> > > -	return lines * cache->fb.stride;
> > > +	return lines * intel_fbc_cfb_stride(dev_priv, cache);
> > >  }
> > >
> > >  static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
> > > @@ -150,15
> > > +185,9 @@ static bool i8xx_fbc_is_active(struct drm_i915_private
> > > +*dev_priv)
> > >
> > >  static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915)  {
> > > -	const struct intel_fbc_reg_params *params = &i915->fbc.params;
> > > -	int limit = i915->fbc.limit;
> > > -
> > > -	if (params->fb.format->cpp[0] == 2)
> > > -		limit <<= 1;
> > > -
> > > -	switch (limit) {
> > > +	switch (i915->fbc.limit) {
> > >  	default:
> > > -		MISSING_CASE(limit);
> > > +		MISSING_CASE(i915->fbc.limit);
> > >  		fallthrough;
> > >  	case 1:
> > >  		return DPFC_CTL_LIMIT_1X;
> > > @@ -301,7 +330,8 @@ static bool ilk_fbc_is_active(struct
> > > drm_i915_private
> > > *dev_priv)
> > >
> > >  static void gen7_fbc_activate(struct drm_i915_private *dev_priv)  {
> > > -	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
> > > +	struct intel_fbc *fbc = &dev_priv->fbc;
> > > +	const struct intel_fbc_reg_params *params = &fbc->params;
> > >  	u32 dpfc_ctl;
> > >
> > >  	/* Display WA #0529: skl, kbl, bxt. */ @@ -310,7 +340,7 @@ static
> > > void gen7_fbc_activate(struct drm_i915_private *dev_priv)
> > >
> > >  		if (params->override_cfb_stride)
> > >  			val |= CHICKEN_FBC_STRIDE_OVERRIDE |
> > > -				CHICKEN_FBC_STRIDE(params-
> > > >override_cfb_stride);
> > > +				CHICKEN_FBC_STRIDE(params-
> > > >override_cfb_stride / fbc->limit);
> > >
> > >  		intel_de_rmw(dev_priv, CHICKEN_MISC_4,
> > >  			     CHICKEN_FBC_STRIDE_OVERRIDE | @@ -443,7 +473,12
> @@ static
> > > u64 intel_fbc_stolen_end(struct drm_i915_private
> > > *dev_priv)
> > >  	return min(end, intel_fbc_cfb_base_max(dev_priv));
> > >  }
> > >
> > > -static int intel_fbc_max_limit(struct drm_i915_private *dev_priv,
> > > int fb_cpp)
> > > +static int intel_fbc_min_limit(int fb_cpp) {
> > > +	return fb_cpp == 2 ? 2 : 1;
> > > +}
> > > +
> > > +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv)
> > >  {
> > >  	/*
> > >  	 * FIXME: FBC1 can have arbitrary cfb stride, @@ -457,7 +492,7 @@
> > > static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
> > >  		return 1;
> > >
> > >  	/* FBC2 can only do 1:1, 1:2, 1:4 */
> > > -	return fb_cpp == 2 ? 2 : 4;
> > > +	return 4;
> > >  }
> > >
> > >  static int find_compression_limit(struct drm_i915_private
> > > *dev_priv, @@ -466,7
> > > +501,9 @@ static int find_compression_limit(struct drm_i915_private
> > > +*dev_priv,  {
> > >  	struct intel_fbc *fbc = &dev_priv->fbc;
> > >  	u64 end = intel_fbc_stolen_end(dev_priv);
> > > -	int ret, limit = 1;
> > > +	int ret, limit = intel_fbc_min_limit(fb_cpp);
> > > +
> > > +	size /= limit;
> > >
> > >  	/* Try to over-allocate to reduce reallocations and fragmentation. */
> > >  	ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> > > >compressed_fb, @@ -474,7 +511,7 @@ static int
> > > >find_compression_limit(struct
> > > drm_i915_private *dev_priv,
> > >  	if (ret == 0)
> > >  		return limit;
> > >
> > > -	for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) {
> > > +	for (; limit <= intel_fbc_max_limit(dev_priv); limit <<= 1) {
> > >  		ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> > > >compressed_fb,
> > >  							   size >>= 1, 4096, 0, end);
> > >  		if (ret == 0)
> > > @@ -505,10 +542,9 @@ static int intel_fbc_alloc_cfb(struct
> > > drm_i915_private *dev_priv,
> > >  	ret = find_compression_limit(dev_priv, size, fb_cpp);
> > >  	if (!ret)
> > >  		goto err_llb;
> > > -	else if (ret > 1) {
> > > +	else if (ret > intel_fbc_min_limit(fb_cpp))
> > >  		drm_info_once(&dev_priv->drm,
> > >  			      "Reducing the compressed framebuffer size. This may
> lead
> > > to less power savings than a non-reduced-size. Try to increase
> > > stolen memory size if available in BIOS.\n");
> > > -	}
> > >
> > >  	fbc->limit = ret;
> > >
> > > @@ -719,11 +755,7 @@ static void intel_fbc_update_state_cache(struct
> > > intel_crtc *crtc,
> > >
> > >  	cache->fb.format = fb->format;
> > >  	cache->fb.modifier = fb->modifier;
> > > -
> > > -	/* FIXME is this correct? */
> > > -	cache->fb.stride = plane_state->view.color_plane[0].stride;
> > > -	if (drm_rotation_90_or_270(plane_state->hw.rotation))
> > > -		cache->fb.stride *= fb->format->cpp[0];
> > > +	cache->fb.stride = intel_fbc_plane_stride(plane_state);
> > >
> > >  	/* FBC1 compression interval: arbitrary choice of 1 second */
> > >  	cache->interval =
> > > drm_mode_vrefresh(&crtc_state->hw.adjusted_mode);
> > > @@ -746,27 +778,29 @@ static bool intel_fbc_cfb_size_changed(struct
> > > drm_i915_private *dev_priv)  {
> > >  	struct intel_fbc *fbc = &dev_priv->fbc;
> > >
> > > -	return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
> > > +	return intel_fbc_cfb_size(dev_priv, &fbc->state_cache) >
> > >  		fbc->compressed_fb.size * fbc->limit;  }
> > >
> > > -static u16 intel_fbc_override_cfb_stride(struct drm_i915_private
> > > *dev_priv)
> > > +static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv,
> > > +					 const struct intel_fbc_state_cache *cache)
> > >  {
> > > -	struct intel_fbc *fbc = &dev_priv->fbc;
> > > -	struct intel_fbc_state_cache *cache = &fbc->state_cache;
> > > +	unsigned int stride = _intel_fbc_cfb_stride(cache);
> > > +	unsigned int stride_aligned = intel_fbc_cfb_stride(dev_priv,
> > > +cache);
> > >
> > > -	if ((DISPLAY_VER(dev_priv) == 9) &&
> > > -	    cache->fb.modifier != I915_FORMAT_MOD_X_TILED)
> > > -		return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8;
> > > -	else
> > > -		return 0;
> > > -}
> > > +	/*
> > > +	 * Override stride in 64 byte units per 4 line segment.
> > > +	 *
> > > +	 * Gen9 hw miscalculates cfb stride for linear as
> > > +	 * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
> > > +	 * we always need to use the override there.
> > > +	 */
> > > +	if (stride != stride_aligned ||
> > > +	    (DISPLAY_VER(dev_priv) == 9 &&
> > > +	     cache->fb.modifier == DRM_FORMAT_MOD_LINEAR))
> > > +		return stride_aligned * 4 / 64;
> >
> > As per bspec WA: 0529
> > "Corruption in some cases when FBC is enabled and the plane surface
> > format is in linear, tile Y legacy, or tile Yf
> > WA: Display register 4208Ch bit 13 must be set to 1b and bits 12:0
> > must be programmed with the compressed buffer stride value. The compressed
> buffer stride must be calculated using the following equation:
> >
> > Compressed buffer stride = ceiling [(at least plane width in pixels) / (32 *
> compression limit factor)] * 8"
> >
> > We need to use override stride even for TileY/Yf as well along with linear. Does the
> 512 alignment takes care of that.
> 
> TileY is actually fine without the w/a if the plane stride is suitably aligned. It only
> goes bad when the stride is misaligned.
> 

Ok, so we should be covered here due to the alignment.

> Not quite sure about Yf since I've not tested it, but we don't even allow FBC with Yf
> at the moment so doesn't really matter.

Yeah ok, make sense.

> > And also whether the calculation for linear aligns with bspec WA. Just wanted to
> highlight, so that we don't miss.
> 
> The bspec calculations are written in a bit convoluted way. I'll respin these patches a
> bit to write the calcualtions out in a way that actually makes it clear what we're
> doing...

Ok sure Ville. Will check and ack that.

Regards,
Uma Shankar

> > Will go with your discretion.
> >
> > >
> > > -static bool intel_fbc_override_cfb_stride_changed(struct
> > > drm_i915_private
> > > *dev_priv) -{
> > > -	struct intel_fbc *fbc = &dev_priv->fbc;
> > > -
> > > -	return fbc->params.override_cfb_stride !=
> > > intel_fbc_override_cfb_stride(dev_priv);
> > > +	return 0;
> > >  }
> > >
> > >  static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv)
> > > @@ -861,7
> > > +895,8 @@ static bool intel_fbc_can_activate(struct intel_crtc
> > > +*crtc)
> > >  		return false;
> > >  	}
> > >
> > > -	if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) {
> > > +	if (!stride_is_valid(dev_priv, cache->fb.modifier,
> > > +			     cache->fb.stride * cache->fb.format->cpp[0])) {
> > >  		fbc->no_fbc_reason = "framebuffer stride not supported";
> > >  		return false;
> > >  	}
> > > @@ -949,9 +984,9 @@ static void intel_fbc_get_reg_params(struct intel_crtc
> *crtc,
> > >  	params->fb.modifier = cache->fb.modifier;
> > >  	params->fb.stride = cache->fb.stride;
> > >
> > > -	params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
> > > -
> > > -	params->override_cfb_stride = cache->override_cfb_stride;
> > > +	params->cfb_stride = intel_fbc_cfb_stride(dev_priv, cache);
> > > +	params->cfb_size = intel_fbc_cfb_size(dev_priv, cache);
> > > +	params->override_cfb_stride =
> > > +intel_fbc_override_cfb_stride(dev_priv,
> > > +cache);
> > >
> > >  	params->plane_visible = cache->plane.visible;  } @@ -982,10
> > > +1017,13 @@ static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state
> *crtc_state)
> > >  	if (params->fb.stride != cache->fb.stride)
> > >  		return false;
> > >
> > > -	if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache))
> > > +	if (params->cfb_stride != intel_fbc_cfb_stride(dev_priv, cache))
> > >  		return false;
> > >
> > > -	if (params->override_cfb_stride != cache->override_cfb_stride)
> > > +	if (params->cfb_size != intel_fbc_cfb_size(dev_priv, cache))
> > > +		return false;
> > > +
> > > +	if (params->override_cfb_stride !=
> > > +intel_fbc_override_cfb_stride(dev_priv, cache))
> > >  		return false;
> > >
> > >  	return true;
> > > @@ -1266,8 +1304,7 @@ static void intel_fbc_enable(struct
> > > intel_atomic_state *state,
> > >
> > >  	if (fbc->crtc) {
> > >  		if (fbc->crtc != crtc ||
> > > -		    (!intel_fbc_cfb_size_changed(dev_priv) &&
> > > -		     !intel_fbc_override_cfb_stride_changed(dev_priv)))
> > > +		    !intel_fbc_cfb_size_changed(dev_priv))
> > >  			goto out;
> > >
> > >  		__intel_fbc_disable(dev_priv);
> > > @@ -1282,15 +1319,13 @@ static void intel_fbc_enable(struct
> > > intel_atomic_state *state,
> > >  		goto out;
> > >
> > >  	if (intel_fbc_alloc_cfb(dev_priv,
> > > -				intel_fbc_calculate_cfb_size(dev_priv, cache),
> > > +				intel_fbc_cfb_size(dev_priv, cache),
> > >  				plane_state->hw.fb->format->cpp[0])) {
> > >  		cache->plane.visible = false;
> > >  		fbc->no_fbc_reason = "not enough stolen memory";
> > >  		goto out;
> > >  	}
> > >
> > > -	cache->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv);
> > > -
> > >  	drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
> > >  		    pipe_name(crtc->pipe));
> > >  	fbc->no_fbc_reason = "FBC enabled but not active yet\n"; diff
> > > --git a/drivers/gpu/drm/i915/i915_drv.h
> > > b/drivers/gpu/drm/i915/i915_drv.h index
> > > 91a2d2425fd3..d124306c0a08 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -401,7 +401,6 @@ struct intel_fbc {
> > >  		} fb;
> > >
> > >  		unsigned int fence_y_offset;
> > > -		u16 override_cfb_stride;
> > >  		u16 interval;
> > >  		s8 fence_id;
> > >  		bool psr2_active;
> > > @@ -426,7 +425,8 @@ struct intel_fbc {
> > >  			u64 modifier;
> > >  		} fb;
> > >
> > > -		int cfb_size;
> > > +		unsigned int cfb_stride;
> > > +		unsigned int cfb_size;
> > >  		unsigned int fence_y_offset;
> > >  		u16 override_cfb_stride;
> > >  		u16 interval;
> > > --
> > > 2.31.1
> > >
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> --
> Ville Syrjälä
> Intel




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux