This patch disables workarounds related to display arbitrated memory bandwidth if it's not required. WA's are applicable for all GEN9 based platforms. Changes since v1: - Rebase on top of Paulo's patch series Changes since v2: - Address review comments - Rebase/rework as per other patch changes in series Changes since v3: - Rebase the patch - Address Paulo's review comments - introduce ww_mutex to protect WM operations - Protect system memory bandwidth calculation with ww_mutex Changes since v4: - drop goto label - fix compilation warnings - Add crtc only if it's WM got changed Signed-off-by: Mahesh Kumar <mahesh1.kumar@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 15 +++ drivers/gpu/drm/i915/intel_display.c | 34 +++++++ drivers/gpu/drm/i915/intel_pm.c | 187 ++++++++++++++++++++++++++++++----- 4 files changed, 213 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9203dad1db07..126c11060ea7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -835,6 +835,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->av_mutex); mutex_init(&dev_priv->wm.wm_mutex); + drm_modeset_lock_init(&dev_priv->wm.wm_ww_mutex); mutex_init(&dev_priv->pps_mutex); intel_uc_init_early(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a88b2721dbe7..974823838da9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1189,6 +1189,13 @@ enum intel_sbi_destination { SBI_MPHY, }; +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ +enum watermark_memory_wa { + WATERMARK_WA_NONE, + WATERMARK_WA_X_TILED, + WATERMARK_WA_Y_TILED, +}; + #define QUIRK_PIPEA_FORCE (1<<0) #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) @@ -1754,8 +1761,15 @@ struct skl_ddb_allocation { struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; }; +struct skl_mem_bw_attr { + enum watermark_memory_wa mem_wa; + uint32_t pipe_bw_kbps[I915_MAX_PIPES]; + bool pipe_y_tiled[I915_MAX_PIPES]; +}; + struct skl_wm_values { unsigned dirty_pipes; + struct skl_mem_bw_attr mem_attr; struct skl_ddb_allocation ddb; }; @@ -2375,6 +2389,7 @@ struct drm_i915_private { * cstate->wm.need_postvbl_update. */ struct mutex wm_mutex; + struct drm_modeset_lock wm_ww_mutex; /* protect DDB/WM redistribution among pipes */ /* * Set during HW readout of watermarks/DDB. Some platforms diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0e92f9bdda1f..76f78879f6e3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13030,6 +13030,38 @@ static void intel_atomic_track_fbs(struct drm_atomic_state *state) to_intel_plane(plane)->frontbuffer_bit); } +static void +intel_update_wm_bw_wa(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const struct drm_crtc *crtc; + const struct drm_crtc_state *cstate; + const struct memdev_info *memdev_info = &dev_priv->memdev_info; + struct skl_mem_bw_attr *results = &intel_state->wm_results.mem_attr; + struct skl_mem_bw_attr *hw_vals = &dev_priv->wm.skl_hw.mem_attr; + int i; + + if (!IS_GEN9(dev_priv)) + return; + + if (!memdev_info->valid) + return; + + for_each_crtc_in_state(state, crtc, cstate, i) { + hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i]; + hw_vals->pipe_y_tiled[i] = results->pipe_y_tiled[i]; + } + + hw_vals->mem_wa = results->mem_wa; + + /* + * As we already updated state variables no need to hold the lock, + * other commits can proceed now with their calculation + */ + drm_modeset_unlock(&dev_priv->wm.wm_ww_mutex); +} + /** * intel_atomic_commit - commit validated state object * @dev: DRM device @@ -13070,6 +13102,8 @@ static int intel_atomic_commit(struct drm_device *dev, intel_shared_dpll_swap_state(state); intel_atomic_track_fbs(state); + intel_update_wm_bw_wa(state); + if (intel_state->modeset) { memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e578d7f9f871..7c0113bf2387 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2867,20 +2867,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev) #define SKL_SAGV_BLOCK_TIME 30 /* µs */ -/* - * FIXME: We still don't have the proper code detect if we need to apply the WA, - * so assume we'll always need it in order to avoid underruns. - */ -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - - if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) - return true; - - return false; -} - static bool intel_has_sagv(struct drm_i915_private *dev_priv) { @@ -3028,7 +3014,8 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency = dev_priv->wm.skl_latency[level]; - if (skl_needs_memory_bw_wa(intel_state) && + if (intel_state->wm_results.mem_attr.mem_wa != + WATERMARK_WA_NONE && plane->base.state->fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3560,7 +3547,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t y_min_scanlines; struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); + enum watermark_memory_wa mem_wa = state->wm_results.mem_attr.mem_wa; bool y_tiled, x_tiled; if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) { @@ -3576,7 +3563,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) latency += 4; - if (apply_memory_bw_wa && x_tiled) + if (mem_wa != WATERMARK_WA_NONE && x_tiled) latency += 15; width = drm_rect_width(&intel_pstate->base.src) >> 16; @@ -3611,7 +3598,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } - if (apply_memory_bw_wa) + if (mem_wa == WATERMARK_WA_Y_TILED) y_min_scanlines *= 2; plane_bytes_per_line = width * cpp; @@ -4064,6 +4051,128 @@ skl_compute_ddb(struct drm_atomic_state *state) return 0; } +static int +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct memdev_info *memdev_info = &dev_priv->memdev_info; + struct skl_mem_bw_attr *mem_attr = &intel_state->wm_results.mem_attr; + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + int active_pipes = 0; + uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps; + int display_bw_percentage; + bool y_tile_enabled = false; + int ret, i; + + if (!IS_GEN9(dev_priv)) { + mem_attr->mem_wa = WATERMARK_WA_NONE; + return 0; + } + + if (!memdev_info->valid) { + mem_attr->mem_wa = WATERMARK_WA_Y_TILED; + return 0; + } + + /* + * We hold wm_mutex lock, so any other flip can't proceed beyond WM + * calculation step until this flip writes modified WM values. + * So it's safe to read plane_state of other pipes without taking CRTC lock + */ + ret = drm_modeset_lock(&dev_priv->wm.wm_ww_mutex, state->acquire_ctx); + if (ret) + return ret; + + memcpy(mem_attr, &dev_priv->wm.skl_hw.mem_attr, sizeof(*mem_attr)); + + for_each_crtc_in_state(state, crtc, cstate, i) { + struct drm_plane *plane; + const struct drm_plane_state *pstate; + int active_planes = 0; + uint32_t max_plane_bw_kbps = 0; + + mem_attr->pipe_y_tiled[i] = false; + + if (!cstate->active) { + mem_attr->pipe_bw_kbps[i] = 0; + continue; + } + + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, + cstate) { + struct drm_framebuffer *fb; + uint32_t plane_bw_kbps; + enum plane_id id = to_intel_plane(plane)->id; + + if (!pstate->visible) + continue; + + if (WARN_ON(!pstate->fb)) + continue; + + if (id == PLANE_CURSOR) + continue; + + fb = pstate->fb; + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED || + fb->modifier == I915_FORMAT_MOD_Yf_TILED)) + mem_attr->pipe_y_tiled[i] = true; + + plane_bw_kbps = skl_adjusted_plane_pixel_rate( + to_intel_crtc_state(cstate), + to_intel_plane_state(pstate)); + max_plane_bw_kbps = max(plane_bw_kbps, + max_plane_bw_kbps); + active_planes++; + } + mem_attr->pipe_bw_kbps[i] = max_plane_bw_kbps * active_planes; + } + + for_each_pipe(dev_priv, i) { + if (mem_attr->pipe_bw_kbps[i]) { + max_pipe_bw_kbps = max(max_pipe_bw_kbps, + mem_attr->pipe_bw_kbps[i]); + active_pipes++; + } + if (mem_attr->pipe_y_tiled[i]) + y_tile_enabled = true; + } + + total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes; + display_bw_percentage = DIV_ROUND_UP_ULL((uint64_t)total_pipe_bw_kbps * + 100, memdev_info->bandwidth_kbps); + + /* + * If there is any Ytile plane enabled and arbitrated display + * bandwidth > 20% of raw system memory bandwidth + * Enale Y-tile related WA + * + * If memory is dual channel single rank, Xtile limit = 35%, else Xtile + * limit = 60% + * If there is no Ytile plane enabled and + * arbitrated display bandwidth > Xtile limit + * Enable X-tile realated WA + */ + if (y_tile_enabled && (display_bw_percentage > 20)) + mem_attr->mem_wa = WATERMARK_WA_Y_TILED; + else { + int x_tile_percentage = 60; + enum memdev_rank rank = memdev_info->rank; + + if ((rank == I915_DRAM_RANK_SINGLE) && + (memdev_info->num_channels == 2)) + x_tile_percentage = 35; + + if (display_bw_percentage > x_tile_percentage) + mem_attr->mem_wa = WATERMARK_WA_X_TILED; + } + + return 0; +} + static void skl_copy_wm_for_pipe(struct skl_wm_values *dst, struct skl_wm_values *src, @@ -4118,7 +4227,10 @@ skl_compute_wm(struct drm_atomic_state *state) struct drm_crtc *crtc; struct drm_crtc_state *cstate; struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *results = &intel_state->wm_results; + struct intel_crtc *intel_crtc; struct skl_pipe_wm *pipe_wm; bool changed = false; int ret, i; @@ -4139,6 +4251,10 @@ skl_compute_wm(struct drm_atomic_state *state) /* Clear all dirty flags */ results->dirty_pipes = 0; + ret = skl_compute_memory_bandwidth_wm_wa(state); + if (ret) + return ret; + ret = skl_compute_ddb(state); if (ret) return ret; @@ -4153,20 +4269,43 @@ skl_compute_wm(struct drm_atomic_state *state) * should allow skl_update_pipe_wm() to return failure in cases where * no suitable watermark values can be found. */ - for_each_crtc_in_state(state, crtc, cstate, i) { - struct intel_crtc_state *intel_cstate = - to_intel_crtc_state(cstate); - const struct skl_pipe_wm *old_pipe_wm = - &to_intel_crtc_state(crtc->state)->wm.skl.optimal; + for_each_intel_crtc(dev, intel_crtc) { + struct intel_crtc_state *intel_cstate; + const struct skl_pipe_wm *old_pipe_wm; + /* + * We already hold a wm ww_mutex here, so we can peek into + * other pipe's crtc_state without any race condition. + */ + crtc = &intel_crtc->base; + cstate = drm_atomic_get_existing_crtc_state(state, crtc); + if (!cstate) { + if (intel_state->wm_results.mem_attr.mem_wa != + dev_priv->wm.skl_hw.mem_attr.mem_wa) + cstate = crtc->state; + else + continue; + } + + intel_cstate = to_intel_crtc_state(cstate); + old_pipe_wm = &to_intel_crtc_state(crtc->state)->wm.skl.optimal; pipe_wm = &intel_cstate->wm.skl.optimal; ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, &results->ddb, &changed); if (ret) return ret; - if (changed) + if (changed) { + /* + * If WM is changed for crtc and crtc is not already + * part of drm_atomic_state, add crtc to + * drm_atomic_state. + */ + cstate = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(cstate)) + return PTR_ERR(cstate); results->dirty_pipes |= drm_crtc_mask(crtc); + } if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) /* This pipe's WM's did not change */ -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx