> -----Original Message----- > From: Intel-gfx <intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Vinod > Govindapillai > Sent: Thursday, November 21, 2024 4:57 PM > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx; intel-xe@xxxxxxxxxxxxxxxxxxxxx > Cc: Govindapillai, Vinod <vinod.govindapillai@xxxxxxxxx>; Nikula, Jani > <jani.nikula@xxxxxxxxx>; Syrjala, Ville <ville.syrjala@xxxxxxxxx>; Saarinen, Jani > <jani.saarinen@xxxxxxxxx> > Subject: [PATCH v2 3/3] drm/i915/xe3: Use hw support for min/interim ddb > allocations for async flip > > From: Stanislav Lisovskiy <stanislav.lisovskiy@xxxxxxxxx> > > Xe3 is capable of switching automatically to min ddb allocation (not using any > extra blocks) or interim SAGV-adjusted allocation in case if async flip is used. > Introduce the minimum and interim ddb allocation configuration for that purpose. > Also i915 is replaced with intel_display within the patch's context > > v2: update min/interim ddb declarations and handling (Ville) > update to register definitions styling > consolidation of minimal wm0 check with min DDB check Change Looks Good to me. Reviewed-by: Uma Shankar <uma.shankar@xxxxxxxxx> > Bspec: 69880, 72053 > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@xxxxxxxxx> > Signed-off-by: Vinod Govindapillai <vinod.govindapillai@xxxxxxxxx> > --- > .../drm/i915/display/intel_display_types.h | 8 ++ > .../drm/i915/display/skl_universal_plane.c | 26 ++++++ > .../i915/display/skl_universal_plane_regs.h | 15 ++++ > drivers/gpu/drm/i915/display/skl_watermark.c | 87 +++++++++++++++---- > 4 files changed, 118 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h > b/drivers/gpu/drm/i915/display/intel_display_types.h > index 339e4b0f7698..278b4c21f9d9 100644 > --- a/drivers/gpu/drm/i915/display/intel_display_types.h > +++ b/drivers/gpu/drm/i915/display/intel_display_types.h > @@ -769,6 +769,7 @@ struct skl_wm_level { > u8 lines; > bool enable; > bool ignore_lines; > + bool auto_min_alloc_wm_enable; > bool can_sagv; > }; > > @@ -863,6 +864,13 @@ struct intel_crtc_wm_state { > struct skl_ddb_entry plane_ddb[I915_MAX_PLANES]; > /* pre-icl: for planar Y */ > struct skl_ddb_entry plane_ddb_y[I915_MAX_PLANES]; > + > + /* > + * xe3: Minimum amount of display blocks and minimum > + * sagv allocation required for async flip > + */ > + u16 plane_min_ddb[I915_MAX_PLANES]; > + u16 plane_interim_ddb[I915_MAX_PLANES]; > } skl; > > struct { > diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c > b/drivers/gpu/drm/i915/display/skl_universal_plane.c > index 4c7bcf6806ff..ff9764cac1e7 100644 > --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c > +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c > @@ -717,6 +717,22 @@ static u32 skl_plane_ddb_reg_val(const struct > skl_ddb_entry *entry) > PLANE_BUF_START(entry->start); > } > > +static u32 xe3_plane_min_ddb_reg_val(const u16 *min_ddb, > + const u16 *interim_ddb) > +{ > + u32 val = 0; > + > + if (*min_ddb) > + val |= PLANE_MIN_DBUF_BLOCKS(*min_ddb); > + > + if (*interim_ddb) > + val |= PLANE_INTERIM_DBUF_BLOCKS(*interim_ddb); > + > + val |= val ? PLANE_AUTO_MIN_DBUF_EN : 0; > + > + return val; > +} > + > static u32 skl_plane_wm_reg_val(const struct skl_wm_level *level) { > u32 val = 0; > @@ -725,6 +741,9 @@ static u32 skl_plane_wm_reg_val(const struct > skl_wm_level *level) > val |= PLANE_WM_EN; > if (level->ignore_lines) > val |= PLANE_WM_IGNORE_LINES; > + if (level->auto_min_alloc_wm_enable) > + val |= PLANE_WM_AUTO_MIN_ALLOC_EN; > + > val |= REG_FIELD_PREP(PLANE_WM_BLOCKS_MASK, level->blocks); > val |= REG_FIELD_PREP(PLANE_WM_LINES_MASK, level->lines); > > @@ -743,6 +762,9 @@ static void skl_write_plane_wm(struct intel_dsb *dsb, > &crtc_state->wm.skl.plane_ddb[plane_id]; > const struct skl_ddb_entry *ddb_y = > &crtc_state->wm.skl.plane_ddb_y[plane_id]; > + const u16 *min_ddb = &crtc_state->wm.skl.plane_min_ddb[plane_id]; > + const u16 *interim_ddb = > + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; > int level; > > for (level = 0; level < display->wm.num_levels; level++) @@ -767,6 > +789,10 @@ static void skl_write_plane_wm(struct intel_dsb *dsb, > if (DISPLAY_VER(display) < 11) > intel_de_write_dsb(display, dsb, PLANE_NV12_BUF_CFG(pipe, > plane_id), > skl_plane_ddb_reg_val(ddb_y)); > + > + if (DISPLAY_VER(display) >= 30) > + intel_de_write_dsb(display, dsb, PLANE_MIN_BUF_CFG(pipe, > plane_id), > + xe3_plane_min_ddb_reg_val(min_ddb, > interim_ddb)); > } > > static void > diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h > b/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h > index ff31a00d511e..ca9fdfbbe57c 100644 > --- a/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h > +++ b/drivers/gpu/drm/i915/display/skl_universal_plane_regs.h > @@ -322,6 +322,7 @@ > _PLANE_WM_2_A_0, > _PLANE_WM_2_B_0) > #define PLANE_WM_EN REG_BIT(31) > #define PLANE_WM_IGNORE_LINES REG_BIT(30) > +#define PLANE_WM_AUTO_MIN_ALLOC_EN REG_BIT(29) > #define PLANE_WM_LINES_MASK REG_GENMASK(26, 14) > #define PLANE_WM_BLOCKS_MASK REG_GENMASK(11, 0) > > @@ -373,12 +374,26 @@ > #define PLANE_BUF_CFG(pipe, plane) _MMIO_SKL_PLANE((pipe), (plane), \ > > _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B, \ > > _PLANE_BUF_CFG_2_A, _PLANE_BUF_CFG_2_B) > + > /* skl+: 10 bits, icl+ 11 bits, adlp+ 12 bits */ > #define PLANE_BUF_END_MASK REG_GENMASK(27, 16) > #define PLANE_BUF_END(end) > REG_FIELD_PREP(PLANE_BUF_END_MASK, (end)) > #define PLANE_BUF_START_MASK REG_GENMASK(11, 0) > #define PLANE_BUF_START(start) > REG_FIELD_PREP(PLANE_BUF_START_MASK, (start)) > > +#define _PLANE_MIN_BUF_CFG_1_A 0x70274 > +#define _PLANE_MIN_BUF_CFG_2_A 0x70374 > +#define _PLANE_MIN_BUF_CFG_1_B 0x71274 > +#define _PLANE_MIN_BUF_CFG_2_B 0x71374 > +#define PLANE_MIN_BUF_CFG(pipe, plane) _MMIO_SKL_PLANE((pipe), > (plane), \ > + > _PLANE_MIN_BUF_CFG_1_A, _PLANE_MIN_BUF_CFG_1_B, \ > + > _PLANE_MIN_BUF_CFG_2_A, _PLANE_MIN_BUF_CFG_2_B) > +#define PLANE_AUTO_MIN_DBUF_EN REG_BIT(31) > +#define PLANE_MIN_DBUF_BLOCKS_MASK > REG_GENMASK(27, 16) > +#define PLANE_MIN_DBUF_BLOCKS(val) > REG_FIELD_PREP(PLANE_MIN_DBUF_BLOCKS_MASK, (val)) > +#define PLANE_INTERIM_DBUF_BLOCKS_MASK REG_GENMASK(11, 0) > +#define PLANE_INTERIM_DBUF_BLOCKS(val) > REG_FIELD_PREP(PLANE_INTERIM_DBUF_BLOCKS_MASK, (val)) > + > /* tgl+ */ > #define _SEL_FETCH_PLANE_CTL_1_A 0x70890 > #define _SEL_FETCH_PLANE_CTL_2_A 0x708b0 > diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c > b/drivers/gpu/drm/i915/display/skl_watermark.c > index 23ed989f01dc..2d87e02d90e6 100644 > --- a/drivers/gpu/drm/i915/display/skl_watermark.c > +++ b/drivers/gpu/drm/i915/display/skl_watermark.c > @@ -801,30 +801,40 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private > *i915, > const enum pipe pipe, > const enum plane_id plane_id, > struct skl_ddb_entry *ddb, > - struct skl_ddb_entry *ddb_y) > + struct skl_ddb_entry *ddb_y, > + u16 *min_ddb, u16 *interim_ddb) > { > + struct intel_display *display = &i915->display; > u32 val; > > /* Cursor doesn't support NV12/planar, so no extra calculation needed */ > if (plane_id == PLANE_CURSOR) { > - val = intel_de_read(i915, CUR_BUF_CFG(pipe)); > + val = intel_de_read(display, CUR_BUF_CFG(pipe)); > skl_ddb_entry_init_from_hw(ddb, val); > return; > } > > - val = intel_de_read(i915, PLANE_BUF_CFG(pipe, plane_id)); > + val = intel_de_read(display, PLANE_BUF_CFG(pipe, plane_id)); > skl_ddb_entry_init_from_hw(ddb, val); > > - if (DISPLAY_VER(i915) >= 11) > + if (DISPLAY_VER(display) >= 30) { > + val = intel_de_read(display, PLANE_MIN_BUF_CFG(pipe, > plane_id)); > + > + *min_ddb = > REG_FIELD_GET(PLANE_MIN_DBUF_BLOCKS_MASK, val); > + *interim_ddb = > REG_FIELD_GET(PLANE_INTERIM_DBUF_BLOCKS_MASK, val); > + } > + > + if (DISPLAY_VER(display) >= 11) > return; > > - val = intel_de_read(i915, PLANE_NV12_BUF_CFG(pipe, plane_id)); > + val = intel_de_read(display, PLANE_NV12_BUF_CFG(pipe, plane_id)); > skl_ddb_entry_init_from_hw(ddb_y, val); } > > static void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, > struct skl_ddb_entry *ddb, > - struct skl_ddb_entry *ddb_y) > + struct skl_ddb_entry *ddb_y, > + u16 *min_ddb, u16 *interim_ddb) > { > struct drm_i915_private *i915 = to_i915(crtc->base.dev); > enum intel_display_power_domain power_domain; @@ -841,7 +851,9 > @@ static void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc, > skl_ddb_get_hw_plane_state(i915, pipe, > plane_id, > &ddb[plane_id], > - &ddb_y[plane_id]); > + &ddb_y[plane_id], > + &min_ddb[plane_id], > + &interim_ddb[plane_id]); > > intel_display_power_put(i915, power_domain, wakeref); } @@ -1376,9 > +1388,10 @@ static bool use_minimal_wm0_only(const struct intel_crtc_state > *crtc_state, > struct intel_plane *plane) > { > - struct drm_i915_private *i915 = to_i915(plane->base.dev); > + struct intel_display *display = to_intel_display(plane); > > - return DISPLAY_VER(i915) >= 13 && > + /* Xe3+ are auto minimum DDB capble. So don't force minimal wm0 */ > + return IS_DISPLAY_VER(display, 13, 20) && > crtc_state->uapi.async_flip && > plane->async_flip; > } > @@ -1535,6 +1548,7 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state > *state, > const struct intel_dbuf_state *dbuf_state = > intel_atomic_get_new_dbuf_state(state); > const struct skl_ddb_entry *alloc = &dbuf_state->ddb[crtc->pipe]; > + struct intel_display *display = to_intel_display(state); > int num_active = hweight8(dbuf_state->active_pipes); > struct skl_plane_ddb_iter iter; > enum plane_id plane_id; > @@ -1545,6 +1559,10 @@ skl_crtc_allocate_plane_ddb(struct > intel_atomic_state *state, > /* Clear the partitioning for disabled planes. */ > memset(crtc_state->wm.skl.plane_ddb, 0, sizeof(crtc_state- > >wm.skl.plane_ddb)); > memset(crtc_state->wm.skl.plane_ddb_y, 0, sizeof(crtc_state- > >wm.skl.plane_ddb_y)); > + memset(crtc_state->wm.skl.plane_min_ddb, 0, > + sizeof(crtc_state->wm.skl.plane_min_ddb)); > + memset(crtc_state->wm.skl.plane_interim_ddb, 0, > + sizeof(crtc_state->wm.skl.plane_interim_ddb)); > > if (!crtc_state->hw.active) > return 0; > @@ -1617,6 +1635,9 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state > *state, > &crtc_state->wm.skl.plane_ddb[plane_id]; > struct skl_ddb_entry *ddb_y = > &crtc_state->wm.skl.plane_ddb_y[plane_id]; > + u16 *min_ddb = &crtc_state->wm.skl.plane_min_ddb[plane_id]; > + u16 *interim_ddb = > + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; > const struct skl_plane_wm *wm = > &crtc_state->wm.skl.optimal.planes[plane_id]; > > @@ -1633,6 +1654,11 @@ skl_crtc_allocate_plane_ddb(struct > intel_atomic_state *state, > skl_allocate_plane_ddb(&iter, ddb, &wm->wm[level], > crtc_state- > >rel_data_rate[plane_id]); > } > + > + if (DISPLAY_VER(display) >= 30) { > + *min_ddb = wm->wm[0].min_ddb_alloc; > + *interim_ddb = wm->sagv.wm0.min_ddb_alloc; > + } > } > drm_WARN_ON(&i915->drm, iter.size != 0 || iter.data_rate != 0); > > @@ -1676,6 +1702,8 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state > *state, > &crtc_state->wm.skl.plane_ddb[plane_id]; > const struct skl_ddb_entry *ddb_y = > &crtc_state->wm.skl.plane_ddb_y[plane_id]; > + u16 *interim_ddb = > + &crtc_state->wm.skl.plane_interim_ddb[plane_id]; > struct skl_plane_wm *wm = > &crtc_state->wm.skl.optimal.planes[plane_id]; > > @@ -1689,6 +1717,9 @@ skl_crtc_allocate_plane_ddb(struct intel_atomic_state > *state, > } > > skl_check_wm_level(&wm->sagv.wm0, ddb); > + if (DISPLAY_VER(display) >= 30) > + *interim_ddb = wm->sagv.wm0.min_ddb_alloc; > + > skl_check_wm_level(&wm->sagv.trans_wm, ddb); > } > > @@ -1767,6 +1798,7 @@ skl_compute_wm_params(const struct > intel_crtc_state *crtc_state, > int color_plane, unsigned int pan_x) { > struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); > + struct intel_display *display = to_intel_display(crtc_state); > struct drm_i915_private *i915 = to_i915(crtc->base.dev); > u32 interm_pbpl; > > @@ -1825,7 +1857,7 @@ skl_compute_wm_params(const struct > intel_crtc_state *crtc_state, > wp->y_min_scanlines, > wp->dbuf_block_size); > > - if (DISPLAY_VER(i915) >= 30) > + if (DISPLAY_VER(display) >= 30) > interm_pbpl += (pan_x != 0); > else if (DISPLAY_VER(i915) >= 10) > interm_pbpl++; > @@ -1890,6 +1922,12 @@ static int skl_wm_max_lines(struct drm_i915_private > *i915) > return 31; > } > > +static bool xe3_auto_min_alloc_capable(struct intel_display *display, > + int level) > +{ > + return DISPLAY_VER(display) >= 30 && level == 0; } > + > static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, > struct intel_plane *plane, > int level, > @@ -1899,6 +1937,7 @@ static void skl_compute_plane_wm(const struct > intel_crtc_state *crtc_state, > struct skl_wm_level *result /* out */) { > struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); > + struct intel_display *display = to_intel_display(crtc_state); > uint_fixed_16_16_t method1, method2; > uint_fixed_16_16_t selected_result; > u32 blocks, lines, min_ddb_alloc = 0; > @@ -2022,6 +2061,7 @@ static void skl_compute_plane_wm(const struct > intel_crtc_state *crtc_state, > /* Bspec says: value >= plane ddb allocation -> invalid, hence the +1 here > */ > result->min_ddb_alloc = max(min_ddb_alloc, blocks) + 1; > result->enable = true; > + result->auto_min_alloc_wm_enable = > xe3_auto_min_alloc_capable(display, > +level); > > if (DISPLAY_VER(i915) < 12 && i915->display.sagv.block_time_us) > result->can_sagv = latency >= i915->display.sagv.block_time_us; > @@ -2401,16 +2441,18 @@ static bool skl_wm_level_equals(const struct > skl_wm_level *l1, > return l1->enable == l2->enable && > l1->ignore_lines == l2->ignore_lines && > l1->lines == l2->lines && > - l1->blocks == l2->blocks; > + l1->blocks == l2->blocks && > + l1->auto_min_alloc_wm_enable == l2- > >auto_min_alloc_wm_enable; > } > > static bool skl_plane_wm_equals(struct drm_i915_private *i915, > const struct skl_plane_wm *wm1, > const struct skl_plane_wm *wm2) > { > + struct intel_display *display = &i915->display; > int level; > > - for (level = 0; level < i915->display.wm.num_levels; level++) { > + for (level = 0; level < display->wm.num_levels; level++) { > /* > * We don't check uv_wm as the hardware doesn't actually > * use it. It only gets used for calculating the required @@ - > 2950,6 +2992,8 @@ static void skl_wm_level_from_reg_val(struct intel_display > *display, > level->ignore_lines = val & PLANE_WM_IGNORE_LINES; > level->blocks = REG_FIELD_GET(PLANE_WM_BLOCKS_MASK, val); > level->lines = REG_FIELD_GET(PLANE_WM_LINES_MASK, val); > + level->auto_min_alloc_wm_enable = DISPLAY_VER(display) >= 30 ? > + val & > PLANE_WM_AUTO_MIN_ALLOC_EN : 0; > } > > static void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, @@ -3009,11 > +3053,11 @@ static void skl_wm_get_hw_state(struct drm_i915_private *i915) > struct intel_crtc *crtc; > > if (HAS_MBUS_JOINING(display)) > - dbuf_state->joined_mbus = intel_de_read(i915, MBUS_CTL) & > MBUS_JOIN; > + dbuf_state->joined_mbus = intel_de_read(display, MBUS_CTL) & > +MBUS_JOIN; > > dbuf_state->mdclk_cdclk_ratio = intel_mdclk_cdclk_ratio(display, > &display->cdclk.hw); > > - for_each_intel_crtc(&i915->drm, crtc) { > + for_each_intel_crtc(display->drm, crtc) { > struct intel_crtc_state *crtc_state = > to_intel_crtc_state(crtc->base.state); > enum pipe pipe = crtc->pipe; > @@ -3034,12 +3078,17 @@ static void skl_wm_get_hw_state(struct > drm_i915_private *i915) > &crtc_state->wm.skl.plane_ddb[plane_id]; > struct skl_ddb_entry *ddb_y = > &crtc_state->wm.skl.plane_ddb_y[plane_id]; > + u16 *min_ddb = > + &crtc_state->wm.skl.plane_min_ddb[plane_id]; > + u16 *interim_ddb = > + &crtc_state- > >wm.skl.plane_interim_ddb[plane_id]; > > if (!crtc_state->hw.active) > continue; > > skl_ddb_get_hw_plane_state(i915, crtc->pipe, > - plane_id, ddb, ddb_y); > + plane_id, ddb, ddb_y, > + min_ddb, interim_ddb); > > skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb); > skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb_y); > @@ -3061,7 +3110,7 @@ static void skl_wm_get_hw_state(struct > drm_i915_private *i915) > dbuf_state->slices[pipe] = > skl_ddb_dbuf_slice_mask(i915, &crtc_state- > >wm.skl.ddb); > > - drm_dbg_kms(&i915->drm, > + drm_dbg_kms(display->drm, > "[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active > pipes 0x%x, mbus joined: %s\n", > crtc->base.base.id, crtc->base.name, > dbuf_state->slices[pipe], dbuf_state->ddb[pipe].start, > @@ -3069,7 +3118,7 @@ static void skl_wm_get_hw_state(struct > drm_i915_private *i915) > str_yes_no(dbuf_state->joined_mbus)); > } > > - dbuf_state->enabled_slices = i915->display.dbuf.enabled_slices; > + dbuf_state->enabled_slices = display->dbuf.enabled_slices; > } > > bool skl_watermark_ipc_enabled(struct drm_i915_private *i915) @@ -3704,6 > +3753,8 @@ void intel_wm_state_verify(struct intel_atomic_state *state, > struct skl_hw_state { > struct skl_ddb_entry ddb[I915_MAX_PLANES]; > struct skl_ddb_entry ddb_y[I915_MAX_PLANES]; > + u16 min_ddb[I915_MAX_PLANES]; > + u16 interim_ddb[I915_MAX_PLANES]; > struct skl_pipe_wm wm; > } *hw; > const struct skl_pipe_wm *sw_wm = &new_crtc_state->wm.skl.optimal; > @@ -3720,7 +3771,7 @@ void intel_wm_state_verify(struct intel_atomic_state > *state, > > skl_pipe_wm_get_hw_state(crtc, &hw->wm); > > - skl_pipe_ddb_get_hw_state(crtc, hw->ddb, hw->ddb_y); > + skl_pipe_ddb_get_hw_state(crtc, hw->ddb, hw->ddb_y, hw->min_ddb, > +hw->interim_ddb); > > hw_enabled_slices = intel_enabled_dbuf_slices_mask(i915); > > -- > 2.34.1