Re: [PATCH v2 2/3] drm/i915: Adjust CDCLK accordingly to our DBuf bw needs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Mar 17, 2020 at 12:43:38AM +0200, Stanislav Lisovskiy wrote:
> According to BSpec max BW per slice is calculated using formula
> Max BW = CDCLK * 64. Currently when calculating min CDCLK we
> account only per plane requirements, however in order to avoid
> FIFO underruns we need to estimate accumulated BW consumed by
> all planes(ddb entries basically) residing on that particular
> DBuf slice. This will allow us to put CDCLK lower and save power
> when we don't need that much bandwidth or gain additional
> performance once plane consumption grows.
> 
> v2: - Fix long line warning
>     - Limited new DBuf bw checks to only gens >= 11
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c       | 46 +++++++++++++++++++
>  drivers/gpu/drm/i915/display/intel_bw.h       |  1 +
>  drivers/gpu/drm/i915/display/intel_cdclk.c    | 25 ++++++++++
>  drivers/gpu/drm/i915/display/intel_display.c  | 10 +++-
>  .../drm/i915/display/intel_display_power.h    |  1 +
>  drivers/gpu/drm/i915/intel_pm.c               | 34 +++++++++++++-
>  drivers/gpu/drm/i915/intel_pm.h               |  3 ++
>  7 files changed, 117 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> index 58b264bc318d..a85125110d7e 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -6,6 +6,7 @@
>  #include <drm/drm_atomic_state_helper.h>
>  
>  #include "intel_bw.h"
> +#include "intel_pm.h"
>  #include "intel_display_types.h"
>  #include "intel_sideband.h"
>  
> @@ -334,6 +335,51 @@ static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_
>  	return data_rate;
>  }
>  
> +int intel_bw_calc_min_cdclk(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	int max_bw_per_dbuf[DBUF_SLICE_MAX];
> +	int i = 0;
> +	enum plane_id plane_id;
> +	struct intel_crtc_state *crtc_state;
> +	struct intel_crtc *crtc;
> +	int max_bw = 0;
> +	int min_cdclk;
> +
> +	memset(max_bw_per_dbuf, 0, sizeof(max_bw_per_dbuf[0]) * DBUF_SLICE_MAX);
> +
> +	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
> +		for_each_plane_id_on_crtc(crtc, plane_id) {
> +			struct skl_ddb_entry *plane_alloc =
> +				&crtc_state->wm.skl.plane_ddb_y[plane_id];
> +			struct skl_ddb_entry *uv_plane_alloc =
> +				&crtc_state->wm.skl.plane_ddb_uv[plane_id];
> +			unsigned int data_rate = crtc_state->data_rate[plane_id];
> +			int slice_id = 0;
> +			u32 dbuf_mask = skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc);
> +
> +			dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc);
> +
> +			DRM_DEBUG_KMS("Got dbuf mask %x for pipe %c ddb %d-%d plane %d data rate %d\n",
> +				      dbuf_mask, pipe_name(crtc->pipe), plane_alloc->start,
> +				      plane_alloc->end, plane_id, data_rate);
> +
> +			while (dbuf_mask != 0) {
> +				if (dbuf_mask & 1) {
> +					max_bw_per_dbuf[slice_id] += data_rate;
> +					max_bw = max(max_bw, max_bw_per_dbuf[slice_id]);
> +				}
> +				slice_id++;
> +				dbuf_mask >>= 1;
> +			}
> +		}
> +	}

Something like?

for_each_plane_id() {
	for_each_dbuf_slice() {
		skl_ddb_entry_for_slices(BIT(slice), &ddb_slice);
		
		if (skl_ddb_entries_overlap(&ddb_slice, &ddb[plane_id])))
			bw[slice] += data_rate;
	}
}

But this seems to borked anyway since we only consider the crtcs in the
state, and there are those ugly FIXMEs below.

I have a feeling what we want is dbuf_state, and track the bw used for 
each slice therein. Should also allow us to flag the cdclk recalculation
only when things actually change in a way that needs more cdclk, instead 
of pessimising every plane enable/disable like you do below.

> +
> +	min_cdclk = max_bw / 64;
> +
> +	return min_cdclk;
> +}
> +
>  void intel_bw_crtc_update(struct intel_bw_state *bw_state,
>  			  const struct intel_crtc_state *crtc_state)
>  {
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
> index a8aa7624c5aa..8a522b571c51 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.h
> +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> @@ -29,5 +29,6 @@ int intel_bw_init(struct drm_i915_private *dev_priv);
>  int intel_bw_atomic_check(struct intel_atomic_state *state);
>  void intel_bw_crtc_update(struct intel_bw_state *bw_state,
>  			  const struct intel_crtc_state *crtc_state);
> +int intel_bw_calc_min_cdclk(struct intel_atomic_state *state);
>  
>  #endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
> index 0741d643455b..9f2de613642e 100644
> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
> @@ -25,6 +25,7 @@
>  #include "intel_cdclk.h"
>  #include "intel_display_types.h"
>  #include "intel_sideband.h"
> +#include "intel_bw.h"
>  
>  /**
>   * DOC: CDCLK / RAWCLK
> @@ -1979,11 +1980,19 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>  {
>  	struct drm_i915_private *dev_priv =
>  		to_i915(crtc_state->uapi.crtc->dev);
> +	struct intel_atomic_state *state = NULL;
>  	int min_cdclk;
>  
>  	if (!crtc_state->hw.enable)
>  		return 0;
>  
> +	/*
> +	 * FIXME: Unfortunately when this gets called from intel_modeset_setup_hw_state
> +	 * there is no intel_atomic_state at all. So lets not then use it.
> +	 */
> +	if (crtc_state->uapi.state)
> +		state = to_intel_atomic_state(crtc_state->uapi.state);
> +
>  	min_cdclk = intel_pixel_rate_to_cdclk(crtc_state);
>  
>  	/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
> @@ -2058,6 +2067,22 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>  	if (IS_TIGERLAKE(dev_priv))
>  		min_cdclk = max(min_cdclk, (int)crtc_state->pixel_rate);
>  
> +	/*
> +	 * Similar story as with skl_write_plane_wm and intel_enable_sagv
> +	 * - in some certain driver parts, we don't have any guarantee that
> +	 * parent exists. So we might be having a crtc_state without
> +	 * parent state.
> +	 */
> +	if (INTEL_GEN(dev_priv) >= 11) {
> +		if (state) {
> +			int dbuf_bw_cdclk = intel_bw_calc_min_cdclk(state);
> +
> +			DRM_DEBUG_KMS("DBuf bw min cdclk %d current min_cdclk %d\n",
> +				      dbuf_bw_cdclk, min_cdclk);
> +			min_cdclk = max(min_cdclk, dbuf_bw_cdclk);
> +		}
> +	}
> +
>  	if (min_cdclk > dev_priv->max_cdclk_freq) {
>  		drm_dbg_kms(&dev_priv->drm,
>  			    "required cdclk (%d kHz) exceeds max (%d kHz)\n",
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index cdff3054b344..aae5521424c6 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -14632,7 +14632,7 @@ static bool active_planes_affects_min_cdclk(struct drm_i915_private *dev_priv)
>  	/* See {hsw,vlv,ivb}_plane_ratio() */
>  	return IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv) ||
>  		IS_CHERRYVIEW(dev_priv) || IS_VALLEYVIEW(dev_priv) ||
> -		IS_IVYBRIDGE(dev_priv);
> +		IS_IVYBRIDGE(dev_priv) || (INTEL_GEN(dev_priv) >= 11);
>  }
>  
>  static int intel_atomic_check_planes(struct intel_atomic_state *state,
> @@ -14681,6 +14681,14 @@ static int intel_atomic_check_planes(struct intel_atomic_state *state,
>  		if (hweight8(old_active_planes) == hweight8(new_active_planes))
>  			continue;
>  
> +		/*
> +		 * active_planes bitmask has been updated, whenever amount
> +		 * of active planes had changed we need to recalculate CDCLK
> +		 * as it depends on total bandwidth now, not only min_cdclk
> +		 * per plane.
> +		 */
> +		*need_cdclk_calc = true;
> +
>  		ret = intel_crtc_add_planes_to_state(state, crtc, new_active_planes);
>  		if (ret)
>  			return ret;
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> index da64a5edae7a..3446c3ce6a4f 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> @@ -311,6 +311,7 @@ intel_display_power_put_async(struct drm_i915_private *i915,
>  enum dbuf_slice {
>  	DBUF_S1,
>  	DBUF_S2,
> +	DBUF_SLICE_MAX
>  };
>  
>  #define with_intel_display_power(i915, domain, wf) \
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 8375054ba27d..15300c44d9dc 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3844,10 +3844,9 @@ icl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
>  	return offset;
>  }
>  
> -static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv)
> +u16 intel_get_ddb_size(struct drm_i915_private *dev_priv)
>  {
>  	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
> -
>  	drm_WARN_ON(&dev_priv->drm, ddb_size == 0);
>  
>  	if (INTEL_GEN(dev_priv) < 11)
> @@ -3856,6 +3855,37 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv)
>  	return ddb_size;
>  }
>  
> +u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv,
> +			    const struct skl_ddb_entry *entry)
> +{
> +	u32 slice_mask = 0;
> +	u16 ddb_size = intel_get_ddb_size(dev_priv);
> +	u16 num_supported_slices = INTEL_INFO(dev_priv)->num_supported_dbuf_slices;
> +	u16 slice_size = ddb_size / num_supported_slices;
> +	u16 start_slice;
> +	u16 end_slice;
> +
> +	if (!skl_ddb_entry_size(entry))
> +		return 0;
> +
> +	start_slice = entry->start / slice_size;
> +	end_slice = (entry->end - 1) / slice_size;
> +
> +	DRM_DEBUG_KMS("ddb size %d slices %d slice size %d start slice %d end slice %d\n",
> +		      ddb_size, num_supported_slices, slice_size, start_slice, end_slice);
> +
> +	/*
> +	 * Per plane DDB entry can in a really worst case be on multiple slices
> +	 * but single entry is anyway contigious.
> +	 */
> +	while (start_slice <= end_slice) {
> +		slice_mask |= 1 << start_slice;
> +		start_slice++;
> +	}
> +
> +	return slice_mask;
> +}
> +
>  static u8 skl_compute_dbuf_slices(const struct intel_crtc_state *crtc_state,
>  				  u8 active_pipes);
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
> index d60a85421c5a..a9e3835927a8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.h
> +++ b/drivers/gpu/drm/i915/intel_pm.h
> @@ -37,6 +37,9 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
>  			       struct skl_ddb_entry *ddb_y,
>  			       struct skl_ddb_entry *ddb_uv);
>  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv);
> +u16 intel_get_ddb_size(struct drm_i915_private *dev_priv);
> +u32 skl_ddb_dbuf_slice_mask(struct drm_i915_private *dev_priv,
> +			    const struct skl_ddb_entry *entry);
>  void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc,
>  			      struct skl_pipe_wm *out);
>  void g4x_wm_sanitize(struct drm_i915_private *dev_priv);
> -- 
> 2.24.1.485.gad05a3d8e5

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux