On Fri, Mar 11, 2016 at 02:00:22PM +0000, Peter Antoine wrote: > Allow for the MOCS to be programmed for all engines. > Currently we program the MOCS when the first render batch > goes through. This works on most platforms but fails on > platforms that do not run a render batch early, > i.e. headless servers. The patch now programs all initialised engines > on init and the RCS is programmed again within the initial batch. This > is done for predictable consistency with regards to the hardware > context. > > Hardware context loading sets the values of the MOCS for RCS > and L3CC. Programming them from within the batch makes sure that > the render context is valid, no matter what the previous state of > the saved-context was. > > v2: posted correct version to the mailing list. > v3: moved programming to within engine->init_hw() (Chris Wilson) > > Signed-off-by: Peter Antoine <peter.antoine@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem.c | 3 + > drivers/gpu/drm/i915/intel_lrc.c | 4 ++ > drivers/gpu/drm/i915/intel_mocs.c | 120 +++++++++++++++++++++++++++++++++----- > drivers/gpu/drm/i915/intel_mocs.h | 2 + > 4 files changed, 114 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index b854af2..3e18cbd 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -32,6 +32,7 @@ > #include "i915_vgpu.h" > #include "i915_trace.h" > #include "intel_drv.h" > +#include "intel_mocs.h" > #include <linux/shmem_fs.h> > #include <linux/slab.h> > #include <linux/swap.h> > @@ -4882,6 +4883,8 @@ i915_gem_init_hw(struct drm_device *dev) > goto out; > } > > + program_mocs_l3cc_table(dev); intel_mocs_init_l3cc_table(dev_priv); > + > /* We can't enable contexts until all firmware is loaded */ > if (HAS_GUC_UCODE(dev)) { > ret = intel_guc_ucode_load(dev); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 27c9ee3..2ef7a161 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1603,6 +1603,8 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring) > > memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); > > + program_mocs_control_table(ring); ret = intel_mocs_init_engine(ring); Check and propagate the errors! > + > return 0; > } > > @@ -2151,6 +2153,8 @@ static int logical_render_ring_init(struct drm_device *dev) > ret); > } > > + program_mocs_control_table(ring); Due to the splitting inside intel_lrc, it actually makes sense to do a ret = engine->init_hw(engine) call here. But for the time being, adding the setup here is redundant. > /** > + * program_mocs_control_table() - emit the mocs control table > + * @ring: The engine for whom to emit the registers. > + * > + * This function simply emits a MI_LOAD_REGISTER_IMM command for the > + * given table starting at the given address. > + * > + * Return: 0 on success, otherwise the error status. > + */ > +int program_mocs_control_table(struct intel_engine_cs *ring) > +{ > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_mocs_table table; > + unsigned int index; > + > + if (get_mocs_settings(dev, &table)) { if (!get_mocs_setttings()) return 0; And then we can lose one level of indentation. > + if (WARN_ON(table.size > GEN9_NUM_MOCS_ENTRIES)) > + return -ENODEV; > + > + for (index = 0; index < table.size; index++) { > + I915_WRITE(mocs_register(ring->id, index), > + table.table[index].control_value); Whitespace makes this easier to read. Please align continuation lines with the opening bracket. > + } > + > + /* > + * Ok, now set the unused entries to uncached. These entries > + * are officially undefined and no contract for the contents > + * and settings is given for these entries. > + * > + * Entry 0 in the table is uncached - so we are just writing > + * that value to all the used entries. > + */ > + for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { > + I915_WRITE(mocs_register(ring->id, index), > + table.table[0].control_value); Especially here. > + } > + } > + > + return 0; > +} > + > +/** > * emit_mocs_control_table() - emit the mocs control table > * @req: Request to set up the MOCS table for. > * @table: The values to program into the control regs. > @@ -210,7 +252,8 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, > MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); > > for (index = 0; index < table->size; index++) { > - intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index)); > + intel_logical_ring_emit_reg(ringbuf, > + mocs_register(ring, index)); > intel_logical_ring_emit(ringbuf, > table->table[index].control_value); > } > @@ -224,8 +267,10 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, > * that value to all the used entries. > */ > for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { > - intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index)); > - intel_logical_ring_emit(ringbuf, table->table[0].control_value); > + intel_logical_ring_emit_reg(ringbuf, > + mocs_register(ring, index)); > + intel_logical_ring_emit(ringbuf, > + table->table[0].control_value); > } I'll pretend I never saw these spurious chunks. > > intel_logical_ring_emit(ringbuf, MI_NOOP); > @@ -302,6 +347,57 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, > } > > /** > + * program_mocs_l3cc_table() - program the mocs control table > + * @dev: The the device to be programmed. > + * @table: The values to program into the control regs. > + * > + * This function simply programs the mocs registers for the given table > + * starting at the given address. This register set is programmed in pairs. > + * > + * These registers may get programmed more than once, it is simpler to > + * re-program 32 registers than maintain the state of when they were programmed. > + * We are always reprogramming with the same values and this only on context > + * start. > + * > + * Return: Nothing. > + */ > +void program_mocs_l3cc_table(struct drm_device *dev) > +{ > + unsigned int count; > + unsigned int i; > + u32 value; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_i915_mocs_table table; > + > + if (get_mocs_settings(dev, &table)) { Indentation, whitespace, passing around dev but using dev_priv > + u32 filler = (table.table[0].l3cc_value & 0xffff) | > + ((table.table[0].l3cc_value & 0xffff) << 16); l3cc_value is 16bit so the masking is garbage. static inline u32 l3cc_combine(u16 lower, u16 upper) { return lower | upper << 16; } for (i = 0; i < table.size/2; i++) I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(table.table[2*i].l3cc_value, table.table[2*i+1].l3cc_value)); if (table.size & 1) { I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(table.table[2*i].l3cc_value, table.table[0].l3cc_value)); i++; } > + /* > + * Now set the rest of the table to uncached - use entry 0 as > + * this will be uncached. Leave the last pair as initialised as > + * they are reserved by the hardware. > + */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(table.table[0].l3cc_value, table.table[0].l3cc_value)); is easier on the eyes. -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx