Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu: > This patch implemnets Workariunds related to display arbitrated > memory > bandwidth. These WA are applicabe for all gen-9 based platforms. 3 typos above. The WA is already implemented. What the patch does is that it opts-out of the WA in case we conclude it's safe. Please say this in the commit message. > > Changes since v1: > - Rebase on top of Paulo's patch series > Changes since v2: > - Address review comments > - Rebase/rework as per other patch changes in series > Changes since v3: > - Rework based on Maarten's comments > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 11 +++ > drivers/gpu/drm/i915/intel_display.c | 24 ++++++ > drivers/gpu/drm/i915/intel_pm.c | 155 > +++++++++++++++++++++++++++++++++-- > 3 files changed, 181 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index 69213a4..3126259 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1216,6 +1216,13 @@ enum intel_sbi_destination { > SBI_MPHY, > }; > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > +enum watermark_memory_wa { > + WATERMARK_WA_NONE, > + WATERMARK_WA_X_TILED, > + WATERMARK_WA_Y_TILED, > +}; > + > #define QUIRK_PIPEA_FORCE (1<<0) > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation { > > struct skl_wm_values { > unsigned dirty_pipes; > + /* any WaterMark memory workaround Required */ CapitaliZation is weird Here. Besides, no need for the comment. > + enum watermark_memory_wa mem_wa; > + uint32_t pipe_bw_kbps[I915_MAX_PIPES]; > + bool pipe_ytiled[I915_MAX_PIPES]; > struct skl_ddb_allocation ddb; > }; > > diff --git a/drivers/gpu/drm/i915/intel_display.c > b/drivers/gpu/drm/i915/intel_display.c > index 5d11002..f8da488 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct > drm_atomic_state *state) > to_intel_plane(plane)- > >frontbuffer_bit); > } > > +static void > +intel_update_wm_bw_wa(struct drm_atomic_state *state) > +{ > + struct intel_atomic_state *intel_state = > to_intel_atomic_state(state); > + struct drm_i915_private *dev_priv = to_i915(state->dev); > + const struct drm_crtc *crtc; > + const struct drm_crtc_state *cstate; > + struct skl_wm_values *results = &intel_state->wm_results; > + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; > + int i; > + > + if (!IS_GEN9(dev_priv)) > + return; > + > + for_each_crtc_in_state(state, crtc, cstate, i) { > + hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i]; > + hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i]; > + } > + > + hw_vals->mem_wa = results->mem_wa; > +} I think we can just patch skl_copy_wm_for_pipe() to also copy the fields we need instead of adding this function. Whouldn't that be much simpler? Anyway, this one looks correct, so no need to change if you don't want. > + > /** > * intel_atomic_commit - commit validated state object > * @dev: DRM device > @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct > drm_device *dev, > intel_shared_dpll_commit(state); > intel_atomic_track_fbs(state); > > + intel_update_wm_bw_wa(state); > + > if (intel_state->modeset) { > memcpy(dev_priv->min_pixclk, intel_state- > >min_pixclk, > sizeof(intel_state->min_pixclk)); > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index cc8fc84..fda6e1e 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) > > #define SKL_SAGV_BLOCK_TIME 30 /* µs */ > > -/* > - * FIXME: We still don't have the proper code detect if we need to > apply the WA, > - * so assume we'll always need it in order to avoid underruns. > - */ > -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) > +static bool intel_needs_memory_bw_wa(struct intel_atomic_state > *state) Why are we renaming this function? Also, in my last review I suggested that we kill this function. I don't think it makes sense to have this function anymore. Besides, function intel_can_enable_sagv() needs to look at the value we assigned to enum watermark_memory_wa, not get a true/false based on platform version. > { > struct drm_i915_private *dev_priv = to_i915(state- > >base.dev); > > @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct > drm_atomic_state *state) > > latency = dev_priv->wm.skl_latency[level]; > > - if (skl_needs_memory_bw_wa(intel_state) && > + if (intel_needs_memory_bw_wa(intel_state) && > plane->base.state->fb->modifier == > I915_FORMAT_MOD_X_TILED) > latency += 15; > @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > uint32_t y_min_scanlines; > struct intel_atomic_state *state = > to_intel_atomic_state(cstate->base.state); > - bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); > + enum watermark_memory_wa mem_wa; > bool y_tiled, x_tiled; > > if (latency == 0 || !cstate->base.active || !intel_pstate- > >base.visible) { > @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) > latency += 4; > > - if (apply_memory_bw_wa && x_tiled) > + mem_wa = state->wm_results.mem_wa; This assignment could have been done during declaration. > + if (mem_wa != WATERMARK_WA_NONE && x_tiled) > latency += 15; > > width = drm_rect_width(&intel_pstate->base.src) >> 16; > @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > y_min_scanlines = 4; > } > > - if (apply_memory_bw_wa) > + if (mem_wa == WATERMARK_WA_Y_TILED) > y_min_scanlines *= 2; > > plane_bytes_per_line = width * cpp; > @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state > *state) > } > > /* > + * If Watermark workaround is changed we need to recalculate > + * watermark values for all active pipes > + */ > + if (intel_state->wm_results.mem_wa != dev_priv- > >wm.skl_hw.mem_wa) { > + realloc_pipes = ~0; > + intel_state->wm_results.dirty_pipes = ~0; > + } But then skl_ddb_add_affected_planes() only actually adds to the commit the planes that have different DDB partitioning. It seems to me that it may be possible to have a case where the WA changes and the DDB stays the same, so we need to find a way to include every CRTC there. Maybe we could just go and call the function that adds every CRTC here. > + > + /* > * We're not recomputing for the pipes not included in the > commit, so > * make sure we start with the current state. > */ > @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state > *state) > return 0; > } > > +static int > +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state) > +{ > + struct drm_device *dev = state->dev; > + struct drm_i915_private *dev_priv = to_i915(dev); > + struct intel_atomic_state *intel_state = > to_intel_atomic_state(state); > + struct memdev_info *memdev_info = &dev_priv->memdev_info; > + struct skl_wm_values *results = &intel_state->wm_results; > + struct drm_crtc *crtc; > + struct drm_crtc_state *cstate; > + int active_pipes = 0; > + uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps; > + int display_bw_percentage; > + bool y_tile_enabled = false; > + int ret, i; > + > + if (!intel_needs_memory_bw_wa(intel_state)) { > + results->mem_wa = WATERMARK_WA_NONE; > + return 0; > + } > + > + if (!memdev_info->valid) > + goto exit; There's no reason to use a label if it's only called here. Just move all that code to the if statement. > + > + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, > + state->acquire_ctx); > + if (ret) > + return ret; Why are we getting this here? Why this lock specifically? What's it protecting? Doc says: - ww mutex protecting connector state and routing - protects connector->encoder and encoder->crtc links We're not touching any of that here. > + > + memcpy(results->pipe_bw_kbps, dev_priv- > >wm.skl_hw.pipe_bw_kbps, > + sizeof(results->pipe_bw_kbps)); > + memcpy(results->pipe_ytiled, dev_priv- > >wm.skl_hw.pipe_ytiled, > + sizeof(results->pipe_ytiled)); > + > + for_each_crtc_in_state(state, crtc, cstate, i) { > + struct drm_plane *plane; > + const struct drm_plane_state *pstate; > + int active_planes = 0; > + uint32_t max_plane_bw_kbps = 0; > + > + results->pipe_ytiled[i] = false; > + > + if (!cstate->active) { > + results->pipe_bw_kbps[i] = 0; > + continue; > + } > + > + drm_atomic_crtc_state_for_each_plane_state(plane, > pstate, > + csta > te) { > + struct drm_framebuffer *fb; > + uint32_t plane_bw_kbps; > + enum plane_id id = to_intel_plane(plane)- > >id; > + > + if (!pstate->visible) > + continue; > + > + if (WARN_ON(!pstate->fb)) > + continue; > + > + if (id == PLANE_CURSOR) > + continue; > + > + fb = pstate->fb; > + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED > || > + fb->modifier == > I915_FORMAT_MOD_Yf_TILED)) > + results->pipe_ytiled[i] = true; > + > + plane_bw_kbps = > skl_adjusted_plane_pixel_rate( > + to_intel_crtc_state( > cstate), > + to_intel_plane_state > (pstate)); > + max_plane_bw_kbps = max(plane_bw_kbps, > + max_plane_bw > _kbps); > + active_planes++; > + } > + results->pipe_bw_kbps[i] = max_plane_bw_kbps * > active_planes; > + } > + > + for_each_pipe(dev_priv, i) { > + if (results->pipe_bw_kbps[i]) { > + max_pipe_bw_kbps = max(max_pipe_bw_kbps, > + results->pipe_bw_kbps[i]); > + active_pipes++; > + } > + if (results->pipe_ytiled[i]) > + y_tile_enabled = true; > + } > + > + total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes; > + display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps > * 100, > + memdev_info- > >bandwidth_kbps); Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables here, nor the possibility of overflows. > + > + /* > + * If there is any Ytile plane enabled and arbitrated > display > + * bandwidth > 20% of raw system memory bandwidth > + * Enale Y-tile related WA > + * > + * If memory is dual channel single rank, Xtile limit = 35%, > else Xtile > + * limit = 60% > + * If there is no Ytile plane enabled and > + * arbitrated display bandwidth > Xtile limit > + * Enable X-tile realated WA > + */ > + if (y_tile_enabled && (display_bw_percentage > 20)) > + results->mem_wa = WATERMARK_WA_Y_TILED; > + else { > + int x_tile_percentage = 60; > + enum rank rank = DRAM_RANK_SINGLE; > + > + if (memdev_info->rank != DRAM_RANK_INVALID) > + rank = memdev_info->rank; I really think that the previous patch should prevent this. If memdev_info->rank is invalid then memdev_info->valid should also be false and we'd have returned at the beginning of the function. With that, this part of the code could be simplified a little bit since then we'd have no need to choose a default rank. > + > + if ((rank == DRAM_RANK_SINGLE) && > + (memdev_info->num_channels > == 2)) > + x_tile_percentage = 35; > + > + if (display_bw_percentage > x_tile_percentage) > + results->mem_wa = WATERMARK_WA_X_TILED; > + } > + > + return 0; > + > +exit: > + results->mem_wa = WATERMARK_WA_Y_TILED; > + return 0; > +} > + > + Two blank lines here. > static void > skl_copy_wm_for_pipe(struct skl_wm_values *dst, > struct skl_wm_values *src, > @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state) > /* Clear all dirty flags */ > results->dirty_pipes = 0; > > + ret = skl_compute_memory_bandwidth_wm_wa(state); > + if (ret) > + return ret; > + > ret = skl_compute_ddb(state); > if (ret) > return ret; _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx