Re: [PATCH v6 2/2] drm/i915/icl: Define MOCS table for Icelake

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 07/11/2018 15:16, Tomasz Lis wrote:
The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
     definition to be used in multiple arrays. (Lucas)
v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
     to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
v6: Removed definitions of reserved entries. (Michal)
     Increased limit of entries sent to the hardware on gen11+.

BSpec: 34007
BSpec: 560
Signed-off-by: Tomasz Lis <tomasz.lis@xxxxxxxxx>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> (v4)

R-b upgrade needed here as well.

Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx>
Cc: Zhi A Wang <zhi.a.wang@xxxxxxxxx>
Cc: Anuj Phogat <anuj.phogat@xxxxxxxxx>
Cc: Adam Cetnerowski <adam.cetnerowski@xxxxxxxxx>
Cc: Piotr Rozenfeld <piotr.rozenfeld@xxxxxxxxx>
Cc: Lucas De Marchi <lucas.demarchi@xxxxxxxxx>
---
  drivers/gpu/drm/i915/intel_mocs.c | 222 +++++++++++++++++++++++++++++++++-----
  1 file changed, 197 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 8d08a7b..4eb05c6 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
  #define LE_SCC(value)		((value) << 8)
  #define LE_PFM(value)		((value) << 11)
  #define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
/* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
  #define L3_ESC(value)		((value) << 0)
@@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
/* Helper defines */
  #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+
+#define NUM_MOCS_ENTRIES(i915) \
+	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)

I have a suggestion to make this a bit more elegant by avoiding a sprinkle of conditionals throughout the code - since I count ten non-debug call sites of this macros.

Since the MOCS code seems nicely driven from struct drm_i915_mocs_table, the suggestion is to store both the used and maximum valid number of entries per platform in that structure.

It's all nicely consolidated in get_mocs_settings, where all the sanity checks could be moved as well. Other bits of the code would then just trust the table.

/* (e)LLC caching options */
  #define LE_PAGETABLE		0
@@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
   * LNCFCMOCS0 - LNCFCMOCS32 registers.
   *
   * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
   *
   * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
+ *
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
   *
- * NOTE: These tables MUST start with being uncached and the length
- *       MUST be less than 63 as the last two registers are reserved
- *       by the hardware.  These tables are part of the kernel ABI and
- *       may only be updated incrementally by adding entries at the
- *       end.
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that existing entries will remain constant
+ *       and the tables will only be updated by adding new entries.
   */
#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf) \
@@ -147,6 +153,167 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
  #undef MOCS_CONTROL_VALUE
  #undef MOCS_L3CC_VALUE
+#define MOCS_CONTROL_VALUE(lecc, tc, lrum, daom, ersc, scc, pfm, scf, cos, sse) \
+	(LE_CACHEABILITY(lecc) | LE_TGT_CACHE(tc) | \
+	LE_LRUM(lrum) | LE_AOM(daom) | LE_RSC(ersc) | LE_SCC(scc) | \
+	LE_PFM(pfm) | LE_SCF(scf) | LE_COS(cos) | LE_SSE(sse))
+
+#define MOCS_L3CC_VALUE(esc, scc, l3cc) \
+	(L3_ESC(esc) | L3_SCC(scc) | L3_CACHEABILITY(l3cc))
+
+#define GEN11_MOCS_ENTRIES \
+	[0] = { \
+	  /* Base - Uncached (Deprecated) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
+					      0, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[1] = { \
+	  /* Base - L3 + LeCC:PAT (Deprecated) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_PAGETABLE, LE_TC_LLC, \
+					      0, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[2] = { \
+	  /* Base - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[3] = { \
+	  /* Base - Uncached */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
+					      0, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[4] = { \
+	  /* Base - L3 */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_UC, LE_TC_LLC, \
+					      0, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[5] = { \
+	  /* Base - LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[6] = { \
+	  /* Age 0 - LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      1, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[7] = { \
+	  /* Age 0 - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      1, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[8] = { \
+	  /* Age: Don't Chg. - LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      2, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[9] = { \
+	  /* Age: Don't Chg. - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      2, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[10] = { \
+	  /* No AOM - LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 1, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[11] = { \
+	  /* No AOM - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 1, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[12] = { \
+	  /* No AOM; Age 0 - LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      1, 1, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[13] = { \
+	  /* No AOM; Age 0 - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      1, 1, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[14] = { \
+	  /* No AOM; Age:DC - LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      2, 1, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[15] = { \
+	  /* No AOM; Age:DC - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      2, 1, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[18] = { \
+	  /* Self-Snoop - L3 + LLC */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 0, 0, 0, 0, 3), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[19] = { \
+	  /* Skip Caching - L3 + LLC(12.5%) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 7, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[20] = { \
+	  /* Skip Caching - L3 + LLC(25%) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 3, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[21] = { \
+	  /* Skip Caching - L3 + LLC(50%) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 1, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[22] = { \
+	  /* Skip Caching - L3 + LLC(75%) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 1, 3, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[23] = { \
+	  /* Skip Caching - L3 + LLC(87.5%) */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 1, 7, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_WB), \
+	}, \
+	[62] = { \
+	  /* HW Reserved - SW program but never use */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	}, \
+	[63] = { \
+	  /* HW Reserved - SW program but never use */ \
+	  .control_value = MOCS_CONTROL_VALUE(LE_WB, LE_TC_LLC, \
+					      3, 0, 0, 0, 0, 0, 0, 0), \
+	  .l3cc_value = MOCS_L3CC_VALUE(0, 0, L3_UC), \
+	},
+
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+	GEN11_MOCS_ENTRIES
+};
+
+#undef MOCS_CONTROL_VALUE
+#undef MOCS_L3CC_VALUE
+
  /**
   * get_mocs_settings()
   * @dev_priv:	i915 device.
@@ -164,8 +331,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
  {
  	bool result = false;
- if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
-	    IS_ICELAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		table->size  = ARRAY_SIZE(icelake_mocs_table);
+		table->table = icelake_mocs_table;
+		result = true;
+	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
  		table->size  = ARRAY_SIZE(skylake_mocs_table);
  		table->table = skylake_mocs_table;
  		result = true;

So initialize table max here as well.

@@ -228,7 +398,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
  	if (!get_mocs_settings(dev_priv, &table))
  		return;
- GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
+	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));

Then this can go.

for (index = 0; index < table.size; index++)
  		I915_WRITE(mocs_register(engine->id, index),
@@ -242,7 +412,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
  	 * Entry 0 in the table is uncached - so we are just writing
  	 * that value to all the used entries.
  	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
+	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
This can use table->max (or something).

  		I915_WRITE(mocs_register(engine->id, index),
  			   table.table[0].control_value);
  }
@@ -260,18 +430,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
  static int emit_mocs_control_table(struct i915_request *rq,
  				   const struct drm_i915_mocs_table *table)
  {
+	struct drm_i915_private *i915 = rq->i915;
  	enum intel_engine_id engine = rq->engine->id;
  	unsigned int index;
  	u32 *cs;
- if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
  		return -ENODEV;

This can then go as well. Or at least it can be GEM_WARN_ON since it cannot leave development in the broken state.

- cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
  	if (IS_ERR(cs))
  		return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));

These two can be table->max as well then.

Etc to the end.

See what you think.

Regards,

Tvrtko

for (index = 0; index < table->size; index++) {
  		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
@@ -286,7 +457,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
  	 * Entry 0 in the table is uncached - so we are just writing
  	 * that value to all the used entries.
  	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
+	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
  		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
  		*cs++ = table->table[0].control_value;
  	}
@@ -319,17 +490,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
  static int emit_mocs_l3cc_table(struct i915_request *rq,
  				const struct drm_i915_mocs_table *table)
  {
+	struct drm_i915_private *i915 = rq->i915;
  	unsigned int i;
  	u32 *cs;
- if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
  		return -ENODEV;
- cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
  	if (IS_ERR(cs))
  		return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
for (i = 0; i < table->size/2; i++) {
  		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
@@ -348,7 +520,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
  	 * this will be uncached. Leave the last pair uninitialised as
  	 * they are reserved by the hardware.
  	 */
-	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
+	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
  		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
  		*cs++ = l3cc_combine(table, 0, 0);
  	}
@@ -395,7 +567,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
  	 * this will be uncached. Leave the last pair as initialised as
  	 * they are reserved by the hardware.
  	 */
-	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
+	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
  		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
  }
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux