Re: [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround

Mahesh Kumar <mahesh1.kumar@xxxxxxxxx> · Thu, 10 Nov 2016 11:24:48 +0530

Hi,


On Friday 04 November 2016 10:39 PM, Paulo Zanoni wrote:
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
This patch implemnets Workariunds related to display arbitrated
memory
bandwidth. These WA are applicabe for all gen-9 based platforms.

Changes since v1:
  - Rebase on top of Paulo's patch series
Changes since v2:
  - Rebase/rework after addressing Paulo's comments in previous patch
A lot of this code has changed since then, so this will need a
significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
and we're now applying the WA by default: we just won't apply the WA
when we're pretty sure we don't need to. This helps avoiding underruns
by default.

See more below.
make sense, Will change default WA to WA_T_TILED
Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
  drivers/gpu/drm/i915/intel_drv.h |  11 +++
  drivers/gpu/drm/i915/intel_pm.c  | 146
+++++++++++++++++++++++++++++++++++++++
  3 files changed, 166 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index adbd9aa..c169360 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
  	SBI_MPHY,
  };
  
+/* SKL+ Watermark arbitrated display bandwidth Workarounds */
+enum watermark_memory_wa {
+	WATERMARK_WA_NONE,
+	WATERMARK_WA_X_TILED,
+	WATERMARK_WA_Y_TILED,
+};
+
  #define QUIRK_PIPEA_FORCE (1<<0)
  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
  
  struct skl_wm_values {
  	unsigned dirty_pipes;
+	/* any WaterMark memory workaround Required */
We can remove this comment since it doesn't say anything the variable
name doesn't.

+	enum watermark_memory_wa mem_wa;
Now that we have a proper variable in the state struct, it probably
makes sense to just kill skl_needs_memory_bw_wa() and read this
variable when we need to.


  	struct skl_ddb_allocation ddb;
  	uint32_t wm_linetime[I915_MAX_PIPES];
  	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
diff --git a/drivers/gpu/drm/i915/intel_drv.h
b/drivers/gpu/drm/i915/intel_drv.h
index f48e79a..2c1897b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
drm_atomic_state *state,
  	return to_intel_crtc_state(crtc_state);
  }
  
+static inline struct intel_crtc_state *
+intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
+				      struct intel_crtc *crtc)
+{
+	struct drm_crtc_state *crtc_state;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(state,
&crtc->base);
+
+	return to_intel_crtc_state(crtc_state);
I really don't like the idea of calling to_intel_crtc_state() on a
potentially NULL pointer so the caller of this function will also check
for NULL. Even though it works today, I still think it's unsafe
practice. Please check crtc_state for NULL directly and then return
NULL.

Also, I think this function should be extracted to its own commit, and
we'd probably be able to find some callers in the existing i915 code.
Will extract this function & include NULL check for crtc_state itself

+}
+
  static inline struct intel_plane_state *
  intel_atomic_get_existing_plane_state(struct drm_atomic_state
*state,
  				      struct intel_plane *plane)
diff --git a/drivers/gpu/drm/i915/intel_pm.c
b/drivers/gpu/drm/i915/intel_pm.c
index 84ec6b1..5b8f715 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
drm_i915_private *dev_priv,
  {
  	struct drm_plane_state *pstate = &intel_pstate->base;
  	struct drm_framebuffer *fb = pstate->fb;
+	struct intel_atomic_state *intel_state =
+			to_intel_atomic_state(cstate->base.state);
  	uint32_t latency = dev_priv->wm.skl_latency[level];
  	uint32_t method1, method2;
  	uint32_t plane_bytes_per_line, plane_blocks_per_line;
@@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
drm_i915_private *dev_priv,
  	uint32_t width = 0, height = 0;
  	uint32_t plane_pixel_rate;
  	uint32_t y_tile_minimum, y_min_scanlines;
+	enum watermark_memory_wa mem_wa;
  
  	if (latency == 0 || !cstate->base.active || !intel_pstate-
base.visible)
  		return 0;
  
+	mem_wa = intel_state ? intel_state->wm_results.mem_wa :
WATERMARK_WA_NONE;
+	if (mem_wa != WATERMARK_WA_NONE) {
+		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
+			latency += 15;
+	}
+
  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
  	height = drm_rect_height(&intel_pstate->base.src) >> 16;
  
@@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct
drm_i915_private *dev_priv,
  		y_min_scanlines = 4;
  	}
  
+	if (mem_wa == WATERMARK_WA_Y_TILED)
+		y_min_scanlines *= 2;
The changes to this function will need to be rebased. But it's
interesting that my interpretation regarding this *= 2 was different.
After an email to the spec authors it seems your interpretation is the
right one...


+
  	plane_bytes_per_line = width * cpp;
  	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
  	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
@@ -4075,6 +4087,15 @@ skl_include_affected_pipes(struct
drm_atomic_state *state)
  		intel_state->wm_results.dirty_pipes = ~0;
  	}
  
+	/*
+	 * If Watermark workaround is changed we need to recalculate
+	 * watermark values for all active pipes
+	 */
+	if (intel_state->wm_results.mem_wa != dev_priv-
wm.skl_hw.mem_wa) {
+		realloc_pipes = ~0;
+		intel_state->wm_results.dirty_pipes = ~0;
+	}
+
Things have changed since this patch was written. I'd recommend moving
this to skl_set_memory_bandwidth_wa().
Working on re-basing the patch.

  	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
  		struct intel_crtc_state *cstate;
  
@@ -4087,6 +4108,129 @@ skl_include_affected_pipes(struct
drm_atomic_state *state)
  }
  
  static void
+skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
We're not really setting anything here: we're computing. Rename to
skl_compute_wm_memory_bw_wa?


+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc *intel_crtc;
+	struct intel_plane_state *intel_pstate;
+	struct intel_atomic_state *intel_state =
to_intel_atomic_state(state);
+	struct memdev_info *memdev_info = &dev_priv->memdev_info;
+	int num_active_plane, num_active_pipe;
I like to use plural in the number-of variables, since we're counting
the number of active planeS, not the number of the active plane.


+	uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
+	uint32_t total_pipe_bw;
+	uint32_t system_bw = 0;
+	uint32_t rank;
+	int x_tile_per;
+	int display_bw_per;
I read this and kept thinking "x tiles per what?", took me a while to
realize it's percentage. It's probably better to just use a long self-
documenting name here than to use an ambiguous abbreviation.

Also, I'd just go with uint32_t in the percentages too to avoid any
possible problems with the uin32_t calculations.

And perhaps some declarations could be moved to smaller inner scopes
below.


+	bool y_tile_enabled = false;
+
if (!platforms_that_require_the_wa) {
	wa = WATERMARK_WA_NONE;
	return;
}
this function is not called by any GEN other than GEN9, will it be ok to 
add "if (!GEN9)" check?
+	if (!memdev_info->valid)
+		goto exit;
Our default behavior should be to apply the WA then in doubt, not
to avoid it, so the return value here and in the other error cases
should be WATERAMARK_WA_Y_TILED.

Also, you can avoid the gotos by just setting mem_wa at the beginning
of the function, then you can just "return" later instead of goto.


+
+	system_bw = memdev_info->mem_speed_khz * memdev_info-
num_channels *
+				memdev_info->channel_width_bytes;
+
+	if (!system_bw)
+		goto exit;
This shouldn't be possible. Otherwise, the previous patch needs to be
fixed in a way to not allow system_bw to end up as zero. Please either
remove the check or change to "if (WARN_ON(!system_bw))".
yes, ideally this should not be possible, but what if because of any 
reason BIOS is not writing the register OR we don't have BIOS altogether?

If we add WARN_ON here, it'll flood the logs, we can add WARN_ON_ONCE, 
but we are anyway printing the warning in previous patch, if frequency 
or any other variable calculated is ZERO, IMO will add more prints in 
previous patch instead of adding here.

+
+	max_pipe_bw = 0;
+	for_each_intel_crtc(dev, intel_crtc) {
+		struct intel_crtc_state *cstate;
+		struct intel_plane *plane;
+
+		/*
+		 * If CRTC is part of current atomic commit, get
crtc state from
+		 * existing CRTC state. else take the cached CRTC
state
+		 */
+		cstate = NULL;
+		if (state)
If state is NULL we'll segfault way before this point, so there's no
need for this check.


+			cstate =
intel_atomic_get_existing_crtc_state(state,
+					intel_crtc);
+		if (!cstate)
+			cstate = to_intel_crtc_state(intel_crtc-
base.state);
+
+		if (!cstate->base.active)
+			continue;
+
+		num_active_plane = 0;
+		max_plane_bw = 0;
+		for_each_intel_plane_mask(dev, plane, cstate-
base.plane_mask) {
+			struct drm_framebuffer *fb = NULL;
+
+			intel_pstate = NULL;
+			if (state)
Same here: no need to check for state.

+				intel_pstate =
+				intel_atomic_get_existing_plane_stat
e(state,
+									
plane);
+			if (!intel_pstate)
+				intel_pstate =
+					to_intel_plane_state(plane-
base.state);
+
+			WARN_ON(!intel_pstate->base.fb);
if (WARN_ON(!intel_pstate->base.fb))
	return;

Then we can just forget about checking for fb again below.

+
+			if (!intel_pstate->base.visible)
+				continue;
Don't we also need to exclude the cursor here?
yes, cursor should be excluded, will add that. thanks,


+
+			fb = intel_pstate->base.fb;
+			if (fb && (fb->modifier[0] ==
I915_FORMAT_MOD_Y_TILED ||
+				fb->modifier[0] ==
I915_FORMAT_MOD_Yf_TILED))
+				y_tile_enabled = true;
+
+			plane_bw =
skl_adjusted_plane_pixel_rate(cstate,
+								inte
l_pstate);
+			max_plane_bw = max(plane_bw, max_plane_bw);
+			num_active_plane++;
+		}
+		pipe_bw = max_plane_bw * num_active_plane;
+		max_pipe_bw = max(pipe_bw, max_pipe_bw);
+	}
+
+	if (intel_state->active_pipe_changes)
+		num_active_pipe = hweight32(intel_state-
active_crtcs);
+	else
+		num_active_pipe = hweight32(dev_priv->active_crtcs);
Why don't we just trust intel_state->active_crtcs even when
active_pipe_changes is false?
I really didn't dig into that code & reused the code from different 
function. will check into this


+
+	total_pipe_bw = max_pipe_bw * num_active_pipe;
+
+	display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100,
system_bw * 1000);
Why ULL?

Also, if everything is KHz as it's supposed to be, the *100 and *1000
don't make sense.
In previous patch I converted the frequency to MHz... need to fix that, 
then *1000 not be required,
*100 is still required to get the percentage.


+
+	/*
+	 * If there is any Ytile plane enabled and arbitrated
display
+	 * bandwidth > 20% of raw system memory bandwidth
+	 * Enale Y-tile related WA
+	 *
+	 * If memory is dual channel single rank, Xtile limit = 35%,
else Xtile
+	 * limit = 60%
+	 * If there is no Ytile plane enabled and
+	 * arbitrated display bandwidth > Xtile limit
+	 * Enable X-tile realated WA
+	 */
+	if (y_tile_enabled && (display_bw_per > 20))
+		intel_state->wm_results.mem_wa =
WATERMARK_WA_Y_TILED;
+	else {
+
+		if (memdev_info->rank_valid)
+			rank = memdev_info->rank;
+		else
+			rank = DRAM_RANK_DUAL; /* Assume we are dual
rank */
When in doubt, apply the most restrictive workaround to avoid the
possibility of underruns. So here it's safer to assume
DRAM_RANK_SINGLE.
OK...

thanks,

Regards,
-Mahesh

+
+		if ((rank == DRAM_RANK_SINGLE) &&
+					(memdev_info->num_channels
== 2))
+			x_tile_per = 35;
+		else
+			x_tile_per = 60;
+
+		if (display_bw_per > x_tile_per)
+			intel_state->wm_results.mem_wa =
WATERMARK_WA_X_TILED;
+	}
+	return;
+
+exit:
+	intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
+}
+
+static void
  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
  		     struct skl_wm_values *src,
  		     enum pipe pipe)
@@ -4131,6 +4275,8 @@ skl_compute_wm(struct drm_atomic_state *state)
  	/* Clear all dirty flags */
  	results->dirty_pipes = 0;
  
+	skl_set_memory_bandwidth_wm_wa(state);
+
  	ret = skl_include_affected_pipes(state);
  	if (ret)
  		return ret;

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx