Re: [PATCH 5/6] drm/i915: Remove inline from sseu helper functions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 5/1/19 2:04 PM, Summers, Stuart wrote:
On Wed, 2019-05-01 at 13:04 -0700, Daniele Ceraolo Spurio wrote:
Can you elaborate a bit more on what's the rationale for this? do
you
just want to avoid having too many inlines since the paths they're
used
in are not critical, or do you have some more functional reason? This
is
not a critic to the patch, I just want to understand where you're
coming
from ;)

This was a request from Jani Nikula in a previous series update. I
don't have a strong preference either way personally. If you don't have
any major concerns, I'd prefer to keep the series as-is to prevent too
much thrash here, but let me know.


No concerns, just please update the commit message to explain that we're moving them because there is no need for them to be inline since they're not on a critical path where we need preformance.


BTW, looking at this patch I realized there are a few more
DIV_ROUND_UP(..., BITS_PER_BYTE) that could be converted to
GEN_SSEU_STRIDE() in patch 2. I noticed you update them to a new
variable in the next patch, but for consistency it might still be
worth
updating them all in patch 2 or at least mention in the commit
message
of patch 2 that the remaining cases are updated by a follow-up patch
in
the series. Patch 2 is quite small, so you could also just squash it
into patch 6 to avoid the split.

I'm happy to squash them. I did try to isolate this a bit, but you're
right that I ended up pushing some of these DIV_ROUND_UP... stride
calculations to the last patch in the series. If you don't have any
objection, to keep the finaly patch a bit simpler, I'd rather pull
those changes into the earlier patch. I realize you already have a RB
on that patch. Any issues doing this?


If you're changing all of them from DIV_ROUND_UP to GEN_SSEU_STRIDE in patch 2 I'm ok for you to keep the r-b. If you want to port the other logic for saving sseu->ss_stride to that patch then I'll have another quick look at it after you re-send as that is a more complex change.

Daniele

Thanks,
Stuart


Daniele

On 5/1/19 8:34 AM, Stuart Summers wrote:
Additionally, ensure these are all prefixed with intel_sseu_*
to match the convention of other functions in i915.

Signed-off-by: Stuart Summers <stuart.summers@xxxxxxxxx>
---
   drivers/gpu/drm/i915/gt/intel_sseu.c     | 54 +++++++++++++++++++
   drivers/gpu/drm/i915/gt/intel_sseu.h     | 57 +++--------------
---
   drivers/gpu/drm/i915/i915_debugfs.c      |  6 +--
   drivers/gpu/drm/i915/i915_drv.c          |  2 +-
   drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++-------
-----
   5 files changed, 102 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c
b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 7f448f3bea0b..4a0b82fc108c 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -8,6 +8,60 @@
   #include "intel_lrc_reg.h"
   #include "intel_sseu.h"
+unsigned int
+intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
+{
+	unsigned int i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
+		total += hweight8(sseu->subslice_mask[i]);
+
+	return total;
+}
+
+unsigned int
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu,
u8 slice)
+{
+	return hweight8(sseu->subslice_mask[slice]);
+}
+
+static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int
slice,
+			     int subslice)
+{
+	int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
+					   BITS_PER_BYTE);
+	int slice_stride = sseu->max_subslices * subslice_stride;
+
+	return slice * slice_stride + subslice * subslice_stride;
+}
+
+u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int
slice,
+		       int subslice)
+{
+	int i, offset = intel_sseu_eu_idx(sseu, slice, subslice);
+	u16 eu_mask = 0;
+
+	for (i = 0;
+	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice,
BITS_PER_BYTE); i++) {
+		eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
+			(i * BITS_PER_BYTE);
+	}
+
+	return eu_mask;
+}
+
+void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int
subslice,
+			u16 eu_mask)
+{
+	int i, offset = intel_sseu_eu_idx(sseu, slice, subslice);
+
+	for (i = 0;
+	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice,
BITS_PER_BYTE); i++) {
+		sseu->eu_mask[offset + i] =
+			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+	}
+}
+
   u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
   			 const struct intel_sseu *req_sseu)
   {
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h
b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 029e71d8f140..56e3721ae83f 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct
sseu_dev_info *sseu)
   	return value;
   }
-static inline unsigned int sseu_subslice_total(const struct
sseu_dev_info *sseu)
-{
-	unsigned int i, total = 0;
-
-	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
-		total += hweight8(sseu->subslice_mask[i]);
+unsigned int
+intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
- return total;
-}
+unsigned int
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu,
u8 slice);
-static inline unsigned int
-sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8
slice)
-{
-	return hweight8(sseu->subslice_mask[slice]);
-}
-
-static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
-			      int slice, int subslice)
-{
-	int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
-					   BITS_PER_BYTE);
-	int slice_stride = sseu->max_subslices * subslice_stride;
-
-	return slice * slice_stride + subslice * subslice_stride;
-}
+u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int
slice,
+		       int subslice);
-static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu,
-			       int slice, int subslice)
-{
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-	u16 eu_mask = 0;
-
-	for (i = 0;
-	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice,
BITS_PER_BYTE); i++) {
-		eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
-			(i * BITS_PER_BYTE);
-	}
-
-	return eu_mask;
-}
-
-static inline void sseu_set_eus(struct sseu_dev_info *sseu,
-				int slice, int subslice, u16 eu_mask)
-{
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-
-	for (i = 0;
-	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice,
BITS_PER_BYTE); i++) {
-		sseu->eu_mask[offset + i] =
-			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
-	}
-}
+void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int
subslice,
+			u16 eu_mask);
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
   			 const struct intel_sseu *req_sseu);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
b/drivers/gpu/drm/i915/i915_debugfs.c
index fe854c629a32..3f3ee83ac315 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4158,7 +4158,7 @@ static void
broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
   				RUNTIME_INFO(dev_priv)-
sseu.subslice_mask[s];
   		}
   		sseu->eu_total = sseu->eu_per_subslice *
-				 sseu_subslice_total(sseu);
+				 intel_sseu_subslice_total(sseu);
/* subtract fused off EU(s) from enabled slice(s) */
   		for (s = 0; s < fls(sseu->slice_mask); s++) {
@@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct
seq_file *m, bool is_available_info,
   	seq_printf(m, "  %s Slice Total: %u\n", type,
   		   hweight8(sseu->slice_mask));
   	seq_printf(m, "  %s Subslice Total: %u\n", type,
-		   sseu_subslice_total(sseu));
+		   intel_sseu_subslice_total(sseu));
   	for (s = 0; s < fls(sseu->slice_mask); s++) {
   		seq_printf(m, "  %s Slice%i subslices: %u\n", type,
-			   s, sseu_subslices_per_slice(sseu, s));
+			   s, intel_sseu_subslices_per_slice(sseu, s));
   	}
   	seq_printf(m, "  %s EU Total: %u\n", type,
   		   sseu->eu_total);
diff --git a/drivers/gpu/drm/i915/i915_drv.c
b/drivers/gpu/drm/i915/i915_drv.c
index c376244c19c4..130c5140db0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct
drm_device *dev, void *data,
   		value = i915_cmd_parser_get_version(dev_priv);
   		break;
   	case I915_PARAM_SUBSLICE_TOTAL:
-		value = sseu_subslice_total(sseu);
+		value = intel_sseu_subslice_total(sseu);
   		if (!value)
   			return -ENODEV;
   		break;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c
b/drivers/gpu/drm/i915/intel_device_info.c
index 559cf0d0628e..e1dbccf04cd9 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -90,10 +90,10 @@ static void sseu_dump(const struct
sseu_dev_info *sseu, struct drm_printer *p)
drm_printf(p, "slice total: %u, mask=%04x\n",
   		   hweight8(sseu->slice_mask), sseu->slice_mask);
-	drm_printf(p, "subslice total: %u\n",
sseu_subslice_total(sseu));
+	drm_printf(p, "subslice total: %u\n",
intel_sseu_subslice_total(sseu));
   	for (s = 0; s < sseu->max_slices; s++) {
   		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
-			   s, sseu_subslices_per_slice(sseu, s),
+			   s, intel_sseu_subslices_per_slice(sseu, s),
   			   sseu->subslice_mask[s]);
   	}
   	drm_printf(p, "EU total: %u\n", sseu->eu_total);
@@ -126,11 +126,11 @@ void intel_device_info_dump_topology(const
struct sseu_dev_info *sseu,
for (s = 0; s < sseu->max_slices; s++) {
   		drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
-			   s, sseu_subslices_per_slice(sseu, s),
+			   s, intel_sseu_subslices_per_slice(sseu, s),
   			   sseu->subslice_mask[s]);
for (ss = 0; ss < sseu->max_subslices; ss++) {
-			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
+			u16 enabled_eus = intel_sseu_get_eus(sseu, s,
ss);
drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
   				   ss, hweight16(enabled_eus),
enabled_eus);
@@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct
drm_i915_private *dev_priv)
   			sseu->subslice_mask[s] = (ss_en >> ss_idx) &
ss_en_mask;
   			for (ss = 0; ss < sseu->max_subslices; ss++) {
   				if (sseu->subslice_mask[s] & BIT(ss))
-					sseu_set_eus(sseu, s, ss,
eu_en);
+					intel_sseu_set_eus(sseu, s, ss,
eu_en);
   			}
   		}
   	}
@@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct
drm_i915_private *dev_priv)
   	/* Slice0 */
   	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
   	for (ss = 0; ss < sseu->max_subslices; ss++)
-		sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) &
eu_mask);
+		intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) &
eu_mask);
   	/* Slice1 */
-	sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
+	intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
   	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
-	sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
+	intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
   	/* Slice2 */
-	sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
-	sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
+	intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
+	intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
   	/* Slice3 */
-	sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
+	intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
   	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
-	sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
+	intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
   	/* Slice4 */
-	sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
-	sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
+	intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
+	intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
   	/* Slice5 */
-	sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
+	intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
   	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
-	sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
+	intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
/* Do a second pass where we mark the subslices disabled if all
their
   	 * eus are off.
   	 */
   	for (s = 0; s < sseu->max_slices; s++) {
   		for (ss = 0; ss < sseu->max_subslices; ss++) {
-			if (sseu_get_eus(sseu, s, ss) == 0)
+			if (intel_sseu_get_eus(sseu, s, ss) == 0)
   				sseu->subslice_mask[s] &= ~BIT(ss);
   		}
   	}
@@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct
drm_i915_private *dev_priv)
   	 * EU in any one subslice may be fused off for die
   	 * recovery.
   	 */
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
   				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu))
: 0;
+					     intel_sseu_subslice_total(
sseu)) :
+				0;
/* No restrictions on Power Gating */
   	sseu->has_slice_pg = 1;
@@ -290,7 +291,7 @@ static void cherryview_sseu_info_init(struct
drm_i915_private *dev_priv)
   			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
sseu->subslice_mask[0] |= BIT(0);
-		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
+		intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask);
   	}
if (!(fuse & CHV_FGT_DISABLE_SS1)) {
@@ -301,7 +302,7 @@ static void cherryview_sseu_info_init(struct
drm_i915_private *dev_priv)
   			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
sseu->subslice_mask[0] |= BIT(1);
-		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
+		intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask);
   	}
sseu->eu_total = compute_eu_total(sseu);
@@ -310,8 +311,8 @@ static void cherryview_sseu_info_init(struct
drm_i915_private *dev_priv)
   	 * CHV expected to always have a uniform distribution of EU
   	 * across subslices.
   	*/
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
-				sseu->eu_total /
sseu_subslice_total(sseu) :
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
+				sseu->eu_total /
intel_sseu_subslice_total(sseu) :
   				0;
   	/*
   	 * CHV supports subslice power gating on devices with more than
@@ -319,7 +320,7 @@ static void cherryview_sseu_info_init(struct
drm_i915_private *dev_priv)
   	 * more than one EU pair per subslice.
   	*/
   	sseu->has_slice_pg = 0;
-	sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1;
+	sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
   	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
   }
@@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct
drm_i915_private *dev_priv)
eu_disabled_mask = (eu_disable >> (ss * 8)) &
eu_mask;
- sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+			intel_sseu_set_eus(sseu, s, ss,
~eu_disabled_mask);
eu_per_ss = sseu->max_eus_per_subslice -
   				hweight8(eu_disabled_mask);
@@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct
drm_i915_private *dev_priv)
   	 * recovery. BXT is expected to be perfectly uniform in EU
   	 * distribution.
   	*/
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
   				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu))
: 0;
+					     intel_sseu_subslice_total(
sseu)) :
+				0;
   	/*
   	 * SKL+ supports slice power gating on devices with more than
   	 * one slice, and supports EU power gating on devices with
@@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct
drm_i915_private *dev_priv)
   	sseu->has_slice_pg =
   		!IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) >
1;
   	sseu->has_subslice_pg =
-		IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
+		IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu)
1;
   	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
if (IS_GEN9_LP(dev_priv)) {
@@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct
drm_i915_private *dev_priv)
   			eu_disabled_mask =
   				eu_disable[s] >> (ss * sseu-
max_eus_per_subslice);
- sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+			intel_sseu_set_eus(sseu, s, ss,
~eu_disabled_mask);
n_disabled = hweight8(eu_disabled_mask); @@ -496,9 +498,10 @@ static void broadwell_sseu_info_init(struct
drm_i915_private *dev_priv)
   	 * subslices with the exception that any one EU in any one
subslice may
   	 * be fused off for die recovery.
   	 */
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
   				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu))
: 0;
+					     intel_sseu_subslice_total(
sseu)) :
+				0;
/*
   	 * BDW supports slice power gating on devices with more than
@@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct
drm_i915_private *dev_priv)
for (s = 0; s < sseu->max_slices; s++) {
   		for (ss = 0; ss < sseu->max_subslices; ss++) {
-			sseu_set_eus(sseu, s, ss,
-				     (1UL << sseu->eu_per_subslice) -
1);
+			intel_sseu_set_eus(sseu, s, ss,
+					   (1UL << sseu-
eu_per_subslice) - 1);
   		}
   	}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux