On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote: > > On 14/12/2018 18:20, Lucas De Marchi wrote: > > From: Tomasz Lis <tomasz.lis@xxxxxxxxx> > > > > The table has been unified across OSes to minimize virtualization overhead. > > > > The MOCS table is now published as part of bspec, and versioned. Entries > > are supposed to never be modified, but new ones can be added. Adding > > entries increases table version. The patch includes version 1 entries. > > > > Meaning of each entry is now explained in bspec, and user mode clients > > are expected to know what each entry means. The 3 entries used for previous > > platforms are still compatible with their legacy definitions, but that is > > not guaranteed to be true for future platforms. > > > > v2: Fixed SCC values, improved commit comment (Daniele) > > v3: Improved MOCS table comment (Daniele) > > v4: Moved new entries below gen9 ones. Put common entries into > > definition to be used in multiple arrays. (Lucas) > > v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE > > to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas) > > v6: Removed definitions of reserved entries. (Michal) > > Increased limit of entries sent to the hardware on gen11+. > > v7: Simplify table as done for previou gens (Lucas) > > > > BSpec: 34007 > > BSpec: 560 > > > > Signed-off-by: Tomasz Lis <tomasz.lis@xxxxxxxxx> > > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > > Reviewed-by: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> > > Signed-off-by: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++---- > > 1 file changed, 162 insertions(+), 25 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > > index 577633cefb8a..dfc4edea020f 100644 > > --- a/drivers/gpu/drm/i915/intel_mocs.c > > +++ b/drivers/gpu/drm/i915/intel_mocs.c > > @@ -44,6 +44,8 @@ struct drm_i915_mocs_table { > > #define LE_SCC(value) ((value) << 8) > > #define LE_PFM(value) ((value) << 11) > > #define LE_SCF(value) ((value) << 14) > > +#define LE_COS(value) ((value) << 15) > > +#define LE_SSE(value) ((value) << 17) > > /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ > > #define L3_ESC(value) ((value) << 0) > > @@ -52,6 +54,10 @@ struct drm_i915_mocs_table { > > /* Helper defines */ > > #define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ > > +#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ > > + > > +#define NUM_MOCS_ENTRIES(i915) \ > > + (INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES) > > /* (e)LLC caching options */ > > #define LE_0_PAGETABLE _LE_CACHEABILITY(0) > > @@ -80,21 +86,21 @@ struct drm_i915_mocs_table { > > * LNCFCMOCS0 - LNCFCMOCS32 registers. > > * > > * These tables are intended to be kept reasonably consistent across > > - * platforms. However some of the fields are not applicable to all of > > - * them. > > + * HW platforms, and for ICL+, be identical across OSes. To achieve > > + * that, for Icelake and above, list of entries is published as part > > + * of bspec. > > * > > * Entries not part of the following tables are undefined as far as > > - * userspace is concerned and shouldn't be relied upon. For the time > > - * being they will be implicitly initialized to the strictest caching > > - * configuration (uncached) to guarantee forwards compatibility with > > - * userspace programs written against more recent kernels providing > > - * additional MOCS entries. > > + * userspace is concerned and shouldn't be relied upon. > > + * > > + * The last two entries are reserved by the hardware. For ICL+ they > > + * should be initialized according to bspec and never used, for older > > + * platforms they should never be written to. > > * > > - * NOTE: These tables MUST start with being uncached and the length > > - * MUST be less than 63 as the last two registers are reserved > > - * by the hardware. These tables are part of the kernel ABI and > > - * may only be updated incrementally by adding entries at the > > - * end. > > + * NOTE: These tables are part of bspec and defined as part of hardware > > + * interface for ICL+. For older platforms, they are part of kernel > > + * ABI. It is expected that existing entries will remain constant > > + * and the tables will only be updated by adding new entries. > > */ > > #define GEN9_MOCS_ENTRIES \ > > @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > > }, > > }; > > +#define GEN11_MOCS_ENTRIES \ > > + [0] = { \ > > + /* Base - Uncached (Deprecated) */ \ > > + .control_value = LE_1_UC | LE_TC_1_LLC, \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [1] = { \ > > + /* Base - L3 + LeCC:PAT (Deprecated) */ \ > > + .control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [2] = { \ > > + /* Base - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [3] = { \ > > + /* Base - Uncached */ \ > > + .control_value = LE_1_UC | LE_TC_1_LLC, \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [4] = { \ > > + /* Base - L3 */ \ > > + .control_value = LE_1_UC | LE_TC_1_LLC, \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [5] = { \ > > + /* Base - LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [6] = { \ > > + /* Age 0 - LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [7] = { \ > > + /* Age 0 - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [8] = { \ > > + /* Age: Don't Chg. - LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [9] = { \ > > + /* Age: Don't Chg. - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [10] = { \ > > + /* No AOM - LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [11] = { \ > > + /* No AOM - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [12] = { \ > > + /* No AOM; Age 0 - LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [13] = { \ > > + /* No AOM; Age 0 - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [14] = { \ > > + /* No AOM; Age:DC - LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [15] = { \ > > + /* No AOM; Age:DC - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [18] = { \ > > + /* Self-Snoop - L3 + LLC */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [19] = { \ > > + /* Skip Caching - L3 + LLC(12.5%) */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [20] = { \ > > + /* Skip Caching - L3 + LLC(25%) */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [21] = { \ > > + /* Skip Caching - L3 + LLC(50%) */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [22] = { \ > > + /* Skip Caching - L3 + LLC(75%) */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > + [23] = { \ > > + /* Skip Caching - L3 + LLC(87.5%) */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \ > > + .l3cc_value = L3_3_WB \ > > + }, \ > > There is a hole of unused entries here which I think comes to play from the > emit functions later. > > > + [62] = { \ > > + /* HW Reserved - SW program but never use */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ > > + .l3cc_value = L3_1_UC \ > > + }, \ > > + [63] = { \ > > + /* HW Reserved - SW program but never use */ \ > > + .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \ > > + .l3cc_value = L3_1_UC \ > > + }, > > + > > +static const struct drm_i915_mocs_entry icelake_mocs_table[] = { > > + GEN11_MOCS_ENTRIES > > +}; > > + > > /** > > * get_mocs_settings() > > * @dev_priv: i915 device. > > @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, > > { > > bool result = false; > > - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || > > - IS_ICELAKE(dev_priv)) { > > + if (IS_ICELAKE(dev_priv)) { > > + table->size = ARRAY_SIZE(icelake_mocs_table); > > So effectively this is always 64 due last two reserved entries. what do you mean by *always* ? It is 64 for icelake. > > > + table->table = icelake_mocs_table; > > + result = true; > > + } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { > > table->size = ARRAY_SIZE(skylake_mocs_table); > > table->table = skylake_mocs_table; > > result = true; > > @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) > > if (!get_mocs_settings(dev_priv, &table)) > > return; > > - GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES); > > + GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv)); > > for (index = 0; index < table.size; index++) > > I915_WRITE(mocs_register(engine->id, index), > > @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) > > * Entry 0 in the table is uncached - so we are just writing > > * that value to all the used entries. > > */ > > - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) > > + for (; index < NUM_MOCS_ENTRIES(dev_priv); index++) > > I915_WRITE(mocs_register(engine->id, index), > > table.table[0].control_value); > > } > > @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) > > static int emit_mocs_control_table(struct i915_request *rq, > > const struct drm_i915_mocs_table *table) > > { > > + struct drm_i915_private *i915 = rq->i915; > > enum intel_engine_id engine = rq->engine->id; > > unsigned int index; > > u32 *cs; > > - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) > > + if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915))) > > return -ENODEV; > > - cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); > > + cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915)); > > if (IS_ERR(cs)) > > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); > > + *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915)); > > for (index = 0; index < table->size; index++) { > > *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); > > @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq, > > * Entry 0 in the table is uncached - so we are just writing > > * that value to all the used entries. > > */ > > - for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { > > + for (; index < NUM_MOCS_ENTRIES(i915); index++) { > > *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); > > *cs++ = table->table[0].control_value; > > So in init and emit functions the behaviour is now different due reserved > entries. indeed > > Where before (and according the the comment which this patch removes - why?) humn... but the comment is still there. Are we talking about the same comment? /* * Ok, now set the unused entries to uncached. These entries * are officially undefined and no contract for the contents * and settings is given for these entries. * * Entry 0 in the table is uncached - so we are just writing * that value to all the used entries. */ > we were setting unused entries to uncached, on Icelake they will be set to > whatever all zeros is - LE_PAGETABLE? > > If that is not desired, we may need to transition to a scheme where struct > drm_i915_mocs_entry starts holding the table index, and the static array is > not indexed by it. agreed > > It would also save the unused hole of zeros on Icelake which would probably > offset the extra used space. well, but at the expense of the additional field on the used entries and additional complexity in the code - we would lose the logic here to override an entry unless we keep track what has already been set. Lucas De Marchi > > So I think someone needs to clarify what is the desired state for unused > entries on Icelake. > > Regards, > > Tvrtko > > > } > > @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, > > static int emit_mocs_l3cc_table(struct i915_request *rq, > > const struct drm_i915_mocs_table *table) > > { > > + struct drm_i915_private *i915 = rq->i915; > > unsigned int i; > > u32 *cs; > > - if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) > > + if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915))) > > return -ENODEV; > > - cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES); > > + cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915)); > > if (IS_ERR(cs)) > > return PTR_ERR(cs); > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); > > + *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2); > > for (i = 0; i < table->size/2; i++) { > > *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); > > @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq, > > * this will be uncached. Leave the last pair uninitialised as > > * they are reserved by the hardware. > > */ > > - for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { > > + for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) { > > *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); > > *cs++ = l3cc_combine(table, 0, 0); > > } > > @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv) > > * this will be uncached. Leave the last pair as initialised as > > * they are reserved by the hardware. > > */ > > - for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++) > > + for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++) > > I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0)); > > } > > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx