On Fri, Jun 13, 2014 at 08:37:30AM -0700, oscar.mateo@xxxxxxxxx wrote: > From: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > For the most part, logical ring context objects are similar to hardware > contexts in that the backing object is meant to be opaque. There are > some exceptions where we need to poke certain offsets of the object for > initialization, updating the tail pointer or updating the PDPs. > > For our basic execlist implementation we'll only need our PPGTT PDs, > and ringbuffer addresses in order to set up the context. With previous > patches, we have both, so start prepping the context to be load. > > Before running a context for the first time you must populate some > fields in the context object. These fields begin 1 PAGE + LRCA, ie. the > first page (in 0 based counting) of the context image. These same > fields will be read and written to as contexts are saved and restored > once the system is up and running. > > Many of these fields are completely reused from previous global > registers: ringbuffer head/tail/control, context control matches some > previous MI_SET_CONTEXT flags, and page directories. There are other > fields which we don't touch which we may want in the future. > > v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11) > for other engines. > > v3: Several rebases and general changes to the code. > > v4: Squash with "Extract LR context object populating" > Also, Damien's review comments: > - Set the Force Posted bit on the LRI header, as the BSpec suggest we do. > - Prevent warning when compiling a 32-bits kernel without HIGHMEM64. > - Add a clarifying comment to the context population code. > > v5: Damien's review comments: > - The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted. > - Remove dead code. > > Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> (v1) > Signed-off-by: Rafael Barbalho <rafael.barbalho@xxxxxxxxx> (v2) > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> (v3-5) > --- > drivers/gpu/drm/i915/i915_reg.h | 1 + > drivers/gpu/drm/i915/intel_lrc.c | 154 ++++++++++++++++++++++++++++++++++++++- > 2 files changed, 151 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 286f05c..9c8692a 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -277,6 +277,7 @@ > * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! > */ > #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) > +#define MI_LRI_FORCE_POSTED (1<<12) > #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1) > #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1) > #define MI_SRM_LRM_GLOBAL_GTT (1<<22) > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index b3a23e0..b96bb45 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -46,6 +46,38 @@ > > #define GEN8_LR_CONTEXT_ALIGN 4096 > > +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) > +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) > + > +#define CTX_LRI_HEADER_0 0x01 > +#define CTX_CONTEXT_CONTROL 0x02 > +#define CTX_RING_HEAD 0x04 > +#define CTX_RING_TAIL 0x06 > +#define CTX_RING_BUFFER_START 0x08 > +#define CTX_RING_BUFFER_CONTROL 0x0a > +#define CTX_BB_HEAD_U 0x0c > +#define CTX_BB_HEAD_L 0x0e > +#define CTX_BB_STATE 0x10 > +#define CTX_SECOND_BB_HEAD_U 0x12 > +#define CTX_SECOND_BB_HEAD_L 0x14 > +#define CTX_SECOND_BB_STATE 0x16 > +#define CTX_BB_PER_CTX_PTR 0x18 > +#define CTX_RCS_INDIRECT_CTX 0x1a > +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c > +#define CTX_LRI_HEADER_1 0x21 > +#define CTX_CTX_TIMESTAMP 0x22 > +#define CTX_PDP3_UDW 0x24 > +#define CTX_PDP3_LDW 0x26 > +#define CTX_PDP2_UDW 0x28 > +#define CTX_PDP2_LDW 0x2a > +#define CTX_PDP1_UDW 0x2c > +#define CTX_PDP1_LDW 0x2e > +#define CTX_PDP0_UDW 0x30 > +#define CTX_PDP0_LDW 0x32 > +#define CTX_LRI_HEADER_2 0x41 > +#define CTX_R_PWR_CLK_STATE 0x42 > +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 > + > bool intel_enable_execlists(struct drm_device *dev) > { > if (!i915.enable_execlists) > @@ -54,6 +86,110 @@ bool intel_enable_execlists(struct drm_device *dev) > return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); > } > > +static int > +populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, > + struct intel_engine_cs *ring, struct drm_i915_gem_object *ring_obj) > +{ > + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); > + struct page *page; > + uint32_t *reg_state; > + int ret; > + > + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); > + if (ret) { > + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); > + return ret; > + } > + > + ret = i915_gem_object_get_pages(ctx_obj); > + if (ret) { > + DRM_DEBUG_DRIVER("Could not get object pages\n"); > + return ret; > + } > + > + i915_gem_object_pin_pages(ctx_obj); > + > + /* The second page of the context object contains some fields which must > + * be set up prior to the first execution. */ > + page = i915_gem_object_get_page(ctx_obj, 1); > + reg_state = kmap_atomic(page); > + > + /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM > + * commands followed by (reg, value) pairs. The values we are setting here are > + * only for the first context restore: on a subsequent save, the GPU will > + * recreate this batchbuffer with new values (including all the missing > + * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ > + if (ring->id == RCS) > + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14); > + else > + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11); > + reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED; > + reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); > + reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) | MI_RESTORE_INHIBIT; > + reg_state[CTX_CONTEXT_CONTROL+1] |= reg_state[CTX_CONTEXT_CONTROL+1] << 16; If we can, we should probably use _MASKED_BIT_ENABLE() here to make it more obvious why we're doing the or+shift. > + reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); > + reg_state[CTX_RING_HEAD+1] = 0; > + reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); > + reg_state[CTX_RING_TAIL+1] = 0; > + reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); > + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); > + reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); > + reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) | RING_VALID; The size here doesn't look right to me. Shouldn't it be (number of pages - 1)? See init_ring_common() > + reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168; > + reg_state[CTX_BB_HEAD_U+1] = 0; > + reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140; > + reg_state[CTX_BB_HEAD_L+1] = 0; > + reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110; > + reg_state[CTX_BB_STATE+1] = (1<<5); > + reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c; > + reg_state[CTX_SECOND_BB_HEAD_U+1] = 0; > + reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114; > + reg_state[CTX_SECOND_BB_HEAD_L+1] = 0; > + reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; > + reg_state[CTX_SECOND_BB_STATE+1] = 0; > + if (ring->id == RCS) { > + reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; > + reg_state[CTX_BB_PER_CTX_PTR+1] = 0; > + reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; > + reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; > + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; > + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; > + } > + reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); > + reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; > + reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; > + reg_state[CTX_CTX_TIMESTAMP+1] = 0; > + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); > + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); > + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); > + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); > + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); > + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); > + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > + reg_state[CTX_PDP3_UDW+1] = (u64)ppgtt->pd_dma_addr[3] >> 32; > + reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3]; > + reg_state[CTX_PDP2_UDW+1] = (u64)ppgtt->pd_dma_addr[2] >> 32; > + reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2]; > + reg_state[CTX_PDP1_UDW+1] = (u64)ppgtt->pd_dma_addr[1] >> 32; > + reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1]; > + reg_state[CTX_PDP0_UDW+1] = (u64)ppgtt->pd_dma_addr[0] >> 32; > + reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0]; Are we able to use upper_32_bits() and lower_32_bits() for these? Brad > + if (ring->id == RCS) { > + reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > + reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; > + reg_state[CTX_R_PWR_CLK_STATE+1] = 0; > + } > + > + kunmap_atomic(reg_state); > + > + ctx_obj->dirty = 1; > + set_page_dirty(page); > + i915_gem_object_unpin_pages(ctx_obj); > + > + return 0; > +} > + > void intel_lr_context_free(struct intel_context *ctx) > { > int i; > @@ -145,14 +281,24 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, > if (ret) { > DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", > ring->name, ret); > - kfree(ringbuf); > - i915_gem_object_ggtt_unpin(ctx_obj); > - drm_gem_object_unreference(&ctx_obj->base); > - return ret; > + goto error; > + } > + > + ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf->obj); > + if (ret) { > + DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); > + intel_destroy_ring_buffer(ringbuf); > + goto error; > } > > ctx->engine[ring->id].ringbuf = ringbuf; > ctx->engine[ring->id].obj = ctx_obj; > > return 0; > + > +error: > + kfree(ringbuf); > + i915_gem_object_ggtt_unpin(ctx_obj); > + drm_gem_object_unreference(&ctx_obj->base); > + return ret; > } > -- > 1.9.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx