On Thu, Jun 4, 2015 at 11:27 AM, Peter Antoine <peter.antoine@xxxxxxxxx> wrote: > This change adds the programming of the MOCS registers to the gen 9+ > platforms. This change set programs the MOCS register values to a set > of values that are defined to be optimal. > > It creates a fixed register set that is programmed across the different > engines so that all engines have the same table. This is done as the > main RCS context only holds the registers for itself and the shared > L3 values. By trying to keep the registers consistent across the > different engines it should make the programming for the registers > consistent. > > Signed-off-by: Peter Antoine <peter.antoine@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 3 +- > drivers/gpu/drm/i915/i915_reg.h | 9 ++ > drivers/gpu/drm/i915/intel_lrc.c | 68 +++++++++++ > drivers/gpu/drm/i915/intel_mocs.c | 241 ++++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_mocs.h | 101 ++++++++++++++++ > 5 files changed, 421 insertions(+), 1 deletion(-) > create mode 100644 drivers/gpu/drm/i915/intel_mocs.c > create mode 100644 drivers/gpu/drm/i915/intel_mocs.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index b7ddf48..cd7b910 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -36,7 +36,8 @@ i915-y += i915_cmd_parser.o \ > i915_trace_points.o \ > intel_lrc.o \ > intel_ringbuffer.o \ > - intel_uncore.o > + intel_uncore.o \ > + intel_mocs.o > > # autogenerated null render state > i915-y += intel_renderstate_gen6.o \ > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 7213224..3a435b5 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7829,4 +7829,13 @@ enum skl_disp_power_wells { > #define _PALETTE_A (dev_priv->info.display_mmio_offset + 0xa000) > #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800) > > +/* MOCS (Memory Object Control State) registers */ > +#define GEN9_LNCFCMOCS0 (0xB020) /* L3 Cache Control base */ > + > +#define GEN9_GFX_MOCS_0 (0xc800) /* Graphics MOCS base register*/ > +#define GEN9_MFX0_MOCS_0 (0xc900) /* Media 0 MOCS base register*/ > +#define GEN9_MFX1_MOCS_0 (0xcA00) /* Media 1 MOCS base register*/ > +#define GEN9_VEBOX_MOCS_0 (0xcB00) /* Video MOCS base register*/ > +#define GEN9_BLT_MOCS_0 (0xcc00) /* Blitter MOCS base register*/ > + > #endif /* _I915_REG_H_ */ > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 9f5485d..c875569 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -135,6 +135,7 @@ > #include <drm/drmP.h> > #include <drm/i915_drm.h> > #include "i915_drv.h" > +#include "intel_mocs.h" > > #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) > #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > @@ -1370,6 +1371,67 @@ out: > return ret; > } > > +/* > + * i915_gem_program_mocs() - program the MOCS register. > + * > + * ring: The ring that the programming batch will be run in. > + * ctx: The intel_context to be used. > + * > + * This function will emit a batch buffer with the values required for > + * programming the MOCS register values for all the currenly supported > + * rings. > + * > + * Return: 0 on success, otherwise the error status. > + */ > +static int i915_gem_program_mocs(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + int ret = 0; > + > + struct drm_i915_mocs_table t; > + struct drm_device *dev = ring->dev; > + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > + > + if (get_mocs_settings(dev, &t)) { > + u32 table_size; > + > + /* > + * OK. For each supported ring: > + * table_size * 2 dwords for each control_value > + * plus table/2 dwords for l3cc values. > + * > + * Plus 1 for the load command and 1 for the NOOP per ring > + * and the l3cc programming. > + */ > + table_size = GEN9_NUM_MOCS_RINGS * ((2 * t.size) + 2) + > + t.size + 2; > + ret = intel_logical_ring_begin(ringbuf, ctx, table_size); > + if (ret) { > + DRM_ERROR("intel_logical_ring_begin failed %d\n", ret); > + return ret; > + } > + > + /* program the control registers */ > + emit_mocs_control_table(ringbuf, &t, GEN9_GFX_MOCS_0); > + emit_mocs_control_table(ringbuf, &t, GEN9_MFX0_MOCS_0); > + emit_mocs_control_table(ringbuf, &t, GEN9_MFX1_MOCS_0); > + emit_mocs_control_table(ringbuf, &t, GEN9_VEBOX_MOCS_0); > + emit_mocs_control_table(ringbuf, &t, GEN9_BLT_MOCS_0); > + > + /* now program the l3cc registers */ > + emit_mocs_l3cc_table(ringbuf, &t); > + > + intel_logical_ring_advance(ringbuf); > + > + DRM_INFO("MOCS: Table set in Context\n"); > + } else { > + DRM_INFO("MOCS: Table Not supported on platform\n"); > + } > + > + return ret; > +} > + > + > static int gen8_init_rcs_context(struct intel_engine_cs *ring, > struct intel_context *ctx) > { > @@ -1379,6 +1441,12 @@ static int gen8_init_rcs_context(struct intel_engine_cs *ring, > if (ret) > return ret; > > + /* > + * Failing to program the MOCS is non-fatal.The system will not > + * run at peak performance. So generate a warning and carry on. > + */ > + WARN_ON(i915_gem_program_mocs(ring, ctx) != 0); > + > return intel_lr_context_render_state_init(ring, ctx); > } > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > new file mode 100644 > index 0000000..20c9736 > --- /dev/null > +++ b/drivers/gpu/drm/i915/intel_mocs.c > @@ -0,0 +1,241 @@ > +/* > + * Copyright (c) 2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + * > + * Authors: > + * Peter Antoine <peter.antoine@xxxxxxxxx> > + */ > + > +#include "intel_mocs.h" > +#include "intel_lrc.h" > +#include "intel_ringbuffer.h" > + > +/* > + * MOCS tables > + * > + * These are the MOCS tables that are programmed across all the rings. > + * The control value is programmed to all the rings that support the > + * MOCS registers. While the l3cc_values are only programmed to the > + * LNCFCMOCS0 - LNCFCMOCS32 registers. > + * > + * NOTE: These tables MUST start with being uncached {0,0} and the > + * the length MUST be less than 63 as the last two registers are > + * reserved by the hardware. > + */ > +struct drm_i915_mocs_entry skylake_mocs_table[] = { Presumably you want this... > + /* {0x00000009, 0x0010} */ > + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(0) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x0000003b, 0x0030} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))}, > + /* {0x00000039, 0x0010} */ > + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x00000017, 0x0030} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(1) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))}, > + /* {0x00000017, 0x0010} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(1) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x00000019, 0x0010} */ > + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(1) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x00000037, 0x0030} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))}, > + /* {0x0000003b, 0x0010} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > +}; > + > +struct drm_i915_mocs_entry broxton_mocs_table[] = { ... and this to be "static const"? > + /* {0x00000001, 0x0010} */ > + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(0) | MOCS_LRUM(0) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x00004001, 0x0010} */ > + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(0) | MOCS_LRUM(0) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(1)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x0000403b, 0x0030} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(1)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))}, > + /* {0x0000401b, 0x0030} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(1) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(1)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))}, > + /* {0x00000017, 0x0010} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(1) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x00000019, 0x0010} */ > + {(MOCS_CACHEABILITY(1) | MOCS_TGT_CACHE(2) | MOCS_LRUM(1) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > + /* {0x00000037, 0x0030} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(1) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(3))}, > + /* {0x0000003b, 0x0010} */ > + {(MOCS_CACHEABILITY(3) | MOCS_TGT_CACHE(2) | MOCS_LRUM(3) | > + MOCS_AOM(0) | MOCS_LECC_ESC(0) | MOCS_SCC(0) | MOC_PFM(0) | > + MOCS_SCF(0)), > + (MOCS_ESC(0) | MOCS_SCC(0) | MOCS_L3_CACHEABILITY(1))}, > +}; > + > +/** > + * get_mocs_settings > + * > + * This function will return the values of the MOCS table that needs to > + * be programmed for the platform. It will return the values that need > + * to be programmed and if they need to be programmed. > + * > + * If the return values is false then the registers do not need programming. > + */ > +bool get_mocs_settings(struct drm_device *dev, > + struct drm_i915_mocs_table *table) { > + bool result = false; > + > + if (IS_SKYLAKE(dev)) { > + table->size = ARRAY_SIZE(skylake_mocs_table); > + table->table = skylake_mocs_table; > + result = true; > + } else if (IS_BROXTON(dev)) { > + table->size = ARRAY_SIZE(broxton_mocs_table); > + table->table = broxton_mocs_table; > + result = true; > + } else { > + /* Platform that should have a MOCS table does not */ > + WARN_ON(INTEL_INFO(dev)->gen >= 9); > + } > + > + return result; > +} > + > +/** > + * emit_mocs_control_table() - emit the mocs control table > + * @ringbuf: DRM device. > + * @table: The values to program into the control regs. > + * @reg_base: The base for the Engine that needs to be programmed. > + * > + * This function simply emits a MI_LOAD_REGISTER_IMM command for the > + * given table starting at the given address. > + * > + * Return: Nothing. > + */ > +void emit_mocs_control_table(struct intel_ringbuffer *ringbuf, > + struct drm_i915_mocs_table *table, > + u32 reg_base) > +{ > + unsigned int index; > + > + intel_logical_ring_emit(ringbuf, > + MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); > + > + for (index = 0; index < table->size; index++) { > + intel_logical_ring_emit(ringbuf, reg_base + (index * 4)); > + intel_logical_ring_emit(ringbuf, > + table->table[index].control_value); > + } > + > + /* > + * Ok, now set the unused entries to uncached. These entries are > + * officially undefined and no contact is given for the contents and > + * settings is given for these entries. > + * > + * Entry 0 in the table is uncached - so we are just written that > + * value to all the used entries. > + */ > + for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { > + intel_logical_ring_emit(ringbuf, reg_base + (index * 4)); > + intel_logical_ring_emit(ringbuf, table->table[0].control_value); > + } > + > + intel_logical_ring_emit(ringbuf, MI_NOOP); > +} > + > +/** > + * emit_mocs_l3cc_table() - emit the mocs control table > + * @ringbuf: DRM device. > + * @table: The values to program into the control regs. > + * > + * This function simply emits a MI_LOAD_REGISTER_IMM command for the > + * given table starting at the given address. This register set is programmed > + * in pairs. > + * > + * Return: Nothing. > + */ > +void emit_mocs_l3cc_table(struct intel_ringbuffer *ringbuf, > + struct drm_i915_mocs_table *table) { > + unsigned int count; > + unsigned int index; > + > + intel_logical_ring_emit(ringbuf, > + MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES/2)); I think the style is to always put spaces around operators (the /2). Occurs twice in the two for-loops below as well. > + > + for (index = 0, count = 0; index < table->size/2; index++, count += 2) { > + u32 value = (table->table[count].l3cc_value & 0xffff) | > + ((table->table[count + 1].l3cc_value & 0xffff) << 16); > + > + intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + (index * 4)); > + intel_logical_ring_emit(ringbuf, value); > + } > + > + /* > + * Now set the rest of the table to uncached - use entry 0 as this > + * will be uncached. Leave the last pair initialised as reserved by > + * the hardware. > + */ > + for (; index < GEN9_NUM_MOCS_ENTRIES/2; index++) { > + u32 value = (table->table[0].l3cc_value & 0xffff) | > + ((table->table[0].l3cc_value & 0xffff) << 16); > + > + intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + (index * 4)); > + intel_logical_ring_emit(ringbuf, value); > + } _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx