> -----Original Message----- > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Sent: Tuesday, September 21, 2021 8:27 PM > To: Shankar, Uma <uma.shankar@xxxxxxxxx> > Cc: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH 5/8] drm/i915/fbc: Rework cfb stride/size > calculations > > On Mon, Sep 06, 2021 at 05:23:42AM +0000, Shankar, Uma wrote: > > > > > > > -----Original Message----- > > > From: Intel-gfx <intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf > > > Of Ville Syrjala > > > Sent: Saturday, July 3, 2021 2:16 AM > > > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > > Subject: [PATCH 5/8] drm/i915/fbc: Rework cfb > > > stride/size calculations > > > > > > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > > > > > > The code to calculate the cfb stride/size is a bit of mess. > > > The cfb size is getting calculated based purely on the plane stride and plane > height. > > > That doesn't account for extra alignment we want for the cfb stride. > > > The gen9 override stride OTOH is just calculated based on the plane > > > width, and it does try to make things more aligned but any extra > > > alignment added there is not considered in the cfb size calculations. > > > So not at all convinced this is working as intended. Additionally > > > the compression limit handling is split between the cfb allocation > > > code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. > > > > > > Let's streamline the whole thing: > > > - Start with the plane stride, convert that into cfb stride (cfb is > > > always 4 bytes per pixel). All the calculations will assume 1:1 > > > compression limit since that will give us the max values, and we > > > don't yet know how much stolen memory we will be able to allocate > > > - Align the cfb stride to 512 bytes on modern platforms. This guarantees > > > the 4 line segment will be 512 byte aligned regardles of the final > > > compression limit we choose later. The 512 byte alignment for the > > > segment is required by at least some of the platforms, and just doing > > > it always seems like the easiest option > > > - Figure out if we need to use the override stride or not. For X-tiled > > > it's never needed since the plane stride is already 512 byte aligned, > > > for Y-tiled it will be needed if the plane stride is not a multiple > > > of 512 bytes, and for linear it's apparently always needed because the > > > hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of > > > the PLANE_STRIDE*64 that it use with linear. > > > - The cfb size will be calculated based on the aligned cfb stride to > > > guarantee we actually reserved enough stolen memory and the FBC hw > > > won't end up scribbling over whatever else is allocated in stolen > > > - The compression limit handling we just do fully in the cfb allocation > > > code to make things less confusing > > > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > > > --- > > > drivers/gpu/drm/i915/display/intel_fbc.c | 141 ++++++++++++++--------- > > > drivers/gpu/drm/i915/i915_drv.h | 4 +- > > > 2 files changed, 90 insertions(+), 55 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c > > > b/drivers/gpu/drm/i915/display/intel_fbc.c > > > index f5cbbc53837c..2baf58af016c 100644 > > > --- a/drivers/gpu/drm/i915/display/intel_fbc.c > > > +++ b/drivers/gpu/drm/i915/display/intel_fbc.c > > > @@ -62,19 +62,54 @@ static void > > > intel_fbc_get_plane_source_size(const struct intel_fbc_state_cache * > > > *height = cache->plane.src_h; > > > } > > > > > > -static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv, > > > - const struct intel_fbc_state_cache *cache) > > > +/* plane stride in pixels */ > > > +static unsigned int intel_fbc_plane_stride(const struct > > > +intel_plane_state *plane_state) > > > { > > > - int lines; > > > + const struct drm_framebuffer *fb = plane_state->hw.fb; > > > + unsigned int stride; > > > + > > > + stride = plane_state->view.color_plane[0].stride; > > > + if (!drm_rotation_90_or_270(plane_state->hw.rotation)) > > > + stride /= fb->format->cpp[0]; > > > + > > > + return stride; > > > +} > > > + > > > +/* plane stride based cfb stride in bytes, assuming 1:1 compression > > > +limit */ static unsigned int _intel_fbc_cfb_stride(const struct > > > +intel_fbc_state_cache *cache) { > > > + /* FBC always 4 bytes per pixel internally */ > > > + return cache->fb.stride * 4; > > > +} > > > + > > > +/* properly aligned cfb stride in bytes, assuming 1:1 compression > > > +limit */ static unsigned int intel_fbc_cfb_stride(struct drm_i915_private *i915, > > > + const struct intel_fbc_state_cache *cache) > > > { > > > + unsigned int stride = _intel_fbc_cfb_stride(cache); > > > + > > > + /* > > > + * At least some of the platforms require each 4 line segment to > > > + * be 512 byte aligned. Aligning each line to 512 bytes guarantees > > > + * that regardless of the compression limit we choose later. > > > + */ > > > + if (DISPLAY_VER(i915) == 9) > > > + return ALIGN(stride, 512); > > > + else > > > + return stride; > > > +} > > > + > > > +static unsigned int intel_fbc_cfb_size(struct drm_i915_private *dev_priv, > > > + const struct intel_fbc_state_cache *cache) { > > > + int lines = cache->plane.src_h; > > > > > > - intel_fbc_get_plane_source_size(cache, NULL, &lines); > > > if (DISPLAY_VER(dev_priv) == 7) > > > lines = min(lines, 2048); > > > else if (DISPLAY_VER(dev_priv) >= 8) > > > lines = min(lines, 2560); > > > > > > - /* Hardware needs the full buffer stride, not just the active area. */ > > > - return lines * cache->fb.stride; > > > + return lines * intel_fbc_cfb_stride(dev_priv, cache); > > > } > > > > > > static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) > > > @@ -150,15 > > > +185,9 @@ static bool i8xx_fbc_is_active(struct drm_i915_private > > > +*dev_priv) > > > > > > static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915) { > > > - const struct intel_fbc_reg_params *params = &i915->fbc.params; > > > - int limit = i915->fbc.limit; > > > - > > > - if (params->fb.format->cpp[0] == 2) > > > - limit <<= 1; > > > - > > > - switch (limit) { > > > + switch (i915->fbc.limit) { > > > default: > > > - MISSING_CASE(limit); > > > + MISSING_CASE(i915->fbc.limit); > > > fallthrough; > > > case 1: > > > return DPFC_CTL_LIMIT_1X; > > > @@ -301,7 +330,8 @@ static bool ilk_fbc_is_active(struct > > > drm_i915_private > > > *dev_priv) > > > > > > static void gen7_fbc_activate(struct drm_i915_private *dev_priv) { > > > - struct intel_fbc_reg_params *params = &dev_priv->fbc.params; > > > + struct intel_fbc *fbc = &dev_priv->fbc; > > > + const struct intel_fbc_reg_params *params = &fbc->params; > > > u32 dpfc_ctl; > > > > > > /* Display WA #0529: skl, kbl, bxt. */ @@ -310,7 +340,7 @@ static > > > void gen7_fbc_activate(struct drm_i915_private *dev_priv) > > > > > > if (params->override_cfb_stride) > > > val |= CHICKEN_FBC_STRIDE_OVERRIDE | > > > - CHICKEN_FBC_STRIDE(params- > > > >override_cfb_stride); > > > + CHICKEN_FBC_STRIDE(params- > > > >override_cfb_stride / fbc->limit); > > > > > > intel_de_rmw(dev_priv, CHICKEN_MISC_4, > > > CHICKEN_FBC_STRIDE_OVERRIDE | @@ -443,7 +473,12 > @@ static > > > u64 intel_fbc_stolen_end(struct drm_i915_private > > > *dev_priv) > > > return min(end, intel_fbc_cfb_base_max(dev_priv)); > > > } > > > > > > -static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, > > > int fb_cpp) > > > +static int intel_fbc_min_limit(int fb_cpp) { > > > + return fb_cpp == 2 ? 2 : 1; > > > +} > > > + > > > +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv) > > > { > > > /* > > > * FIXME: FBC1 can have arbitrary cfb stride, @@ -457,7 +492,7 @@ > > > static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp) > > > return 1; > > > > > > /* FBC2 can only do 1:1, 1:2, 1:4 */ > > > - return fb_cpp == 2 ? 2 : 4; > > > + return 4; > > > } > > > > > > static int find_compression_limit(struct drm_i915_private > > > *dev_priv, @@ -466,7 > > > +501,9 @@ static int find_compression_limit(struct drm_i915_private > > > +*dev_priv, { > > > struct intel_fbc *fbc = &dev_priv->fbc; > > > u64 end = intel_fbc_stolen_end(dev_priv); > > > - int ret, limit = 1; > > > + int ret, limit = intel_fbc_min_limit(fb_cpp); > > > + > > > + size /= limit; > > > > > > /* Try to over-allocate to reduce reallocations and fragmentation. */ > > > ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc- > > > >compressed_fb, @@ -474,7 +511,7 @@ static int > > > >find_compression_limit(struct > > > drm_i915_private *dev_priv, > > > if (ret == 0) > > > return limit; > > > > > > - for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) { > > > + for (; limit <= intel_fbc_max_limit(dev_priv); limit <<= 1) { > > > ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc- > > > >compressed_fb, > > > size >>= 1, 4096, 0, end); > > > if (ret == 0) > > > @@ -505,10 +542,9 @@ static int intel_fbc_alloc_cfb(struct > > > drm_i915_private *dev_priv, > > > ret = find_compression_limit(dev_priv, size, fb_cpp); > > > if (!ret) > > > goto err_llb; > > > - else if (ret > 1) { > > > + else if (ret > intel_fbc_min_limit(fb_cpp)) > > > drm_info_once(&dev_priv->drm, > > > "Reducing the compressed framebuffer size. This may > lead > > > to less power savings than a non-reduced-size. Try to increase > > > stolen memory size if available in BIOS.\n"); > > > - } > > > > > > fbc->limit = ret; > > > > > > @@ -719,11 +755,7 @@ static void intel_fbc_update_state_cache(struct > > > intel_crtc *crtc, > > > > > > cache->fb.format = fb->format; > > > cache->fb.modifier = fb->modifier; > > > - > > > - /* FIXME is this correct? */ > > > - cache->fb.stride = plane_state->view.color_plane[0].stride; > > > - if (drm_rotation_90_or_270(plane_state->hw.rotation)) > > > - cache->fb.stride *= fb->format->cpp[0]; > > > + cache->fb.stride = intel_fbc_plane_stride(plane_state); > > > > > > /* FBC1 compression interval: arbitrary choice of 1 second */ > > > cache->interval = > > > drm_mode_vrefresh(&crtc_state->hw.adjusted_mode); > > > @@ -746,27 +778,29 @@ static bool intel_fbc_cfb_size_changed(struct > > > drm_i915_private *dev_priv) { > > > struct intel_fbc *fbc = &dev_priv->fbc; > > > > > > - return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) > > > > + return intel_fbc_cfb_size(dev_priv, &fbc->state_cache) > > > > fbc->compressed_fb.size * fbc->limit; } > > > > > > -static u16 intel_fbc_override_cfb_stride(struct drm_i915_private > > > *dev_priv) > > > +static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv, > > > + const struct intel_fbc_state_cache *cache) > > > { > > > - struct intel_fbc *fbc = &dev_priv->fbc; > > > - struct intel_fbc_state_cache *cache = &fbc->state_cache; > > > + unsigned int stride = _intel_fbc_cfb_stride(cache); > > > + unsigned int stride_aligned = intel_fbc_cfb_stride(dev_priv, > > > +cache); > > > > > > - if ((DISPLAY_VER(dev_priv) == 9) && > > > - cache->fb.modifier != I915_FORMAT_MOD_X_TILED) > > > - return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8; > > > - else > > > - return 0; > > > -} > > > + /* > > > + * Override stride in 64 byte units per 4 line segment. > > > + * > > > + * Gen9 hw miscalculates cfb stride for linear as > > > + * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so > > > + * we always need to use the override there. > > > + */ > > > + if (stride != stride_aligned || > > > + (DISPLAY_VER(dev_priv) == 9 && > > > + cache->fb.modifier == DRM_FORMAT_MOD_LINEAR)) > > > + return stride_aligned * 4 / 64; > > > > As per bspec WA: 0529 > > "Corruption in some cases when FBC is enabled and the plane surface > > format is in linear, tile Y legacy, or tile Yf > > WA: Display register 4208Ch bit 13 must be set to 1b and bits 12:0 > > must be programmed with the compressed buffer stride value. The compressed > buffer stride must be calculated using the following equation: > > > > Compressed buffer stride = ceiling [(at least plane width in pixels) / (32 * > compression limit factor)] * 8" > > > > We need to use override stride even for TileY/Yf as well along with linear. Does the > 512 alignment takes care of that. > > TileY is actually fine without the w/a if the plane stride is suitably aligned. It only > goes bad when the stride is misaligned. > Ok, so we should be covered here due to the alignment. > Not quite sure about Yf since I've not tested it, but we don't even allow FBC with Yf > at the moment so doesn't really matter. Yeah ok, make sense. > > And also whether the calculation for linear aligns with bspec WA. Just wanted to > highlight, so that we don't miss. > > The bspec calculations are written in a bit convoluted way. I'll respin these patches a > bit to write the calcualtions out in a way that actually makes it clear what we're > doing... Ok sure Ville. Will check and ack that. Regards, Uma Shankar > > Will go with your discretion. > > > > > > > > -static bool intel_fbc_override_cfb_stride_changed(struct > > > drm_i915_private > > > *dev_priv) -{ > > > - struct intel_fbc *fbc = &dev_priv->fbc; > > > - > > > - return fbc->params.override_cfb_stride != > > > intel_fbc_override_cfb_stride(dev_priv); > > > + return 0; > > > } > > > > > > static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) > > > @@ -861,7 > > > +895,8 @@ static bool intel_fbc_can_activate(struct intel_crtc > > > +*crtc) > > > return false; > > > } > > > > > > - if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) { > > > + if (!stride_is_valid(dev_priv, cache->fb.modifier, > > > + cache->fb.stride * cache->fb.format->cpp[0])) { > > > fbc->no_fbc_reason = "framebuffer stride not supported"; > > > return false; > > > } > > > @@ -949,9 +984,9 @@ static void intel_fbc_get_reg_params(struct intel_crtc > *crtc, > > > params->fb.modifier = cache->fb.modifier; > > > params->fb.stride = cache->fb.stride; > > > > > > - params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache); > > > - > > > - params->override_cfb_stride = cache->override_cfb_stride; > > > + params->cfb_stride = intel_fbc_cfb_stride(dev_priv, cache); > > > + params->cfb_size = intel_fbc_cfb_size(dev_priv, cache); > > > + params->override_cfb_stride = > > > +intel_fbc_override_cfb_stride(dev_priv, > > > +cache); > > > > > > params->plane_visible = cache->plane.visible; } @@ -982,10 > > > +1017,13 @@ static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state > *crtc_state) > > > if (params->fb.stride != cache->fb.stride) > > > return false; > > > > > > - if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache)) > > > + if (params->cfb_stride != intel_fbc_cfb_stride(dev_priv, cache)) > > > return false; > > > > > > - if (params->override_cfb_stride != cache->override_cfb_stride) > > > + if (params->cfb_size != intel_fbc_cfb_size(dev_priv, cache)) > > > + return false; > > > + > > > + if (params->override_cfb_stride != > > > +intel_fbc_override_cfb_stride(dev_priv, cache)) > > > return false; > > > > > > return true; > > > @@ -1266,8 +1304,7 @@ static void intel_fbc_enable(struct > > > intel_atomic_state *state, > > > > > > if (fbc->crtc) { > > > if (fbc->crtc != crtc || > > > - (!intel_fbc_cfb_size_changed(dev_priv) && > > > - !intel_fbc_override_cfb_stride_changed(dev_priv))) > > > + !intel_fbc_cfb_size_changed(dev_priv)) > > > goto out; > > > > > > __intel_fbc_disable(dev_priv); > > > @@ -1282,15 +1319,13 @@ static void intel_fbc_enable(struct > > > intel_atomic_state *state, > > > goto out; > > > > > > if (intel_fbc_alloc_cfb(dev_priv, > > > - intel_fbc_calculate_cfb_size(dev_priv, cache), > > > + intel_fbc_cfb_size(dev_priv, cache), > > > plane_state->hw.fb->format->cpp[0])) { > > > cache->plane.visible = false; > > > fbc->no_fbc_reason = "not enough stolen memory"; > > > goto out; > > > } > > > > > > - cache->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv); > > > - > > > drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n", > > > pipe_name(crtc->pipe)); > > > fbc->no_fbc_reason = "FBC enabled but not active yet\n"; diff > > > --git a/drivers/gpu/drm/i915/i915_drv.h > > > b/drivers/gpu/drm/i915/i915_drv.h index > > > 91a2d2425fd3..d124306c0a08 100644 > > > --- a/drivers/gpu/drm/i915/i915_drv.h > > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > > @@ -401,7 +401,6 @@ struct intel_fbc { > > > } fb; > > > > > > unsigned int fence_y_offset; > > > - u16 override_cfb_stride; > > > u16 interval; > > > s8 fence_id; > > > bool psr2_active; > > > @@ -426,7 +425,8 @@ struct intel_fbc { > > > u64 modifier; > > > } fb; > > > > > > - int cfb_size; > > > + unsigned int cfb_stride; > > > + unsigned int cfb_size; > > > unsigned int fence_y_offset; > > > u16 override_cfb_stride; > > > u16 interval; > > > -- > > > 2.31.1 > > > > > > _______________________________________________ > > > Intel-gfx mailing list > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx > > -- > Ville Syrjälä > Intel