> -----Original Message----- > From: Volkin, Bradley D > Sent: Thursday, June 19, 2014 12:24 AM > To: Mateo Lozano, Oscar > Cc: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH 12/53] drm/i915/bdw: Populate LR contexts > (somewhat) > > On Fri, Jun 13, 2014 at 08:37:30AM -0700, oscar.mateo@xxxxxxxxx wrote: > > From: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > > > For the most part, logical ring context objects are similar to > > hardware contexts in that the backing object is meant to be opaque. > > There are some exceptions where we need to poke certain offsets of the > > object for initialization, updating the tail pointer or updating the PDPs. > > > > For our basic execlist implementation we'll only need our PPGTT PDs, > > and ringbuffer addresses in order to set up the context. With previous > > patches, we have both, so start prepping the context to be load. > > > > Before running a context for the first time you must populate some > > fields in the context object. These fields begin 1 PAGE + LRCA, ie. > > the first page (in 0 based counting) of the context image. These same > > fields will be read and written to as contexts are saved and restored > > once the system is up and running. > > > > Many of these fields are completely reused from previous global > > registers: ringbuffer head/tail/control, context control matches some > > previous MI_SET_CONTEXT flags, and page directories. There are other > > fields which we don't touch which we may want in the future. > > > > v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and > (11) > > for other engines. > > > > v3: Several rebases and general changes to the code. > > > > v4: Squash with "Extract LR context object populating" > > Also, Damien's review comments: > > - Set the Force Posted bit on the LRI header, as the BSpec suggest we do. > > - Prevent warning when compiling a 32-bits kernel without HIGHMEM64. > > - Add a clarifying comment to the context population code. > > > > v5: Damien's review comments: > > - The third MI_LOAD_REGISTER_IMM in the context does not set Force > Posted. > > - Remove dead code. > > > > Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> (v1) > > Signed-off-by: Rafael Barbalho <rafael.barbalho@xxxxxxxxx> (v2) > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> (v3-5) > > --- > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > drivers/gpu/drm/i915/intel_lrc.c | 154 > > ++++++++++++++++++++++++++++++++++++++- > > 2 files changed, 151 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > b/drivers/gpu/drm/i915/i915_reg.h index 286f05c..9c8692a 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -277,6 +277,7 @@ > > * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! > > */ > > #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) > > +#define MI_LRI_FORCE_POSTED (1<<12) > > #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1) #define > > MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1) > > #define MI_SRM_LRM_GLOBAL_GTT (1<<22) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > > b/drivers/gpu/drm/i915/intel_lrc.c > > index b3a23e0..b96bb45 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -46,6 +46,38 @@ > > > > #define GEN8_LR_CONTEXT_ALIGN 4096 > > > > +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) > > +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) > > + > > +#define CTX_LRI_HEADER_0 0x01 > > +#define CTX_CONTEXT_CONTROL 0x02 > > +#define CTX_RING_HEAD 0x04 > > +#define CTX_RING_TAIL 0x06 > > +#define CTX_RING_BUFFER_START 0x08 > > +#define CTX_RING_BUFFER_CONTROL 0x0a > > +#define CTX_BB_HEAD_U 0x0c > > +#define CTX_BB_HEAD_L 0x0e > > +#define CTX_BB_STATE 0x10 > > +#define CTX_SECOND_BB_HEAD_U 0x12 > > +#define CTX_SECOND_BB_HEAD_L 0x14 > > +#define CTX_SECOND_BB_STATE 0x16 > > +#define CTX_BB_PER_CTX_PTR 0x18 > > +#define CTX_RCS_INDIRECT_CTX 0x1a > > +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c > > +#define CTX_LRI_HEADER_1 0x21 > > +#define CTX_CTX_TIMESTAMP 0x22 > > +#define CTX_PDP3_UDW 0x24 > > +#define CTX_PDP3_LDW 0x26 > > +#define CTX_PDP2_UDW 0x28 > > +#define CTX_PDP2_LDW 0x2a > > +#define CTX_PDP1_UDW 0x2c > > +#define CTX_PDP1_LDW 0x2e > > +#define CTX_PDP0_UDW 0x30 > > +#define CTX_PDP0_LDW 0x32 > > +#define CTX_LRI_HEADER_2 0x41 > > +#define CTX_R_PWR_CLK_STATE 0x42 > > +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 > > + > > bool intel_enable_execlists(struct drm_device *dev) { > > if (!i915.enable_execlists) > > @@ -54,6 +86,110 @@ bool intel_enable_execlists(struct drm_device > *dev) > > return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); } > > > > +static int > > +populate_lr_context(struct intel_context *ctx, struct > drm_i915_gem_object *ctx_obj, > > + struct intel_engine_cs *ring, struct drm_i915_gem_object > > +*ring_obj) { > > + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); > > + struct page *page; > > + uint32_t *reg_state; > > + int ret; > > + > > + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); > > + if (ret) { > > + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); > > + return ret; > > + } > > + > > + ret = i915_gem_object_get_pages(ctx_obj); > > + if (ret) { > > + DRM_DEBUG_DRIVER("Could not get object pages\n"); > > + return ret; > > + } > > + > > + i915_gem_object_pin_pages(ctx_obj); > > + > > + /* The second page of the context object contains some fields which > must > > + * be set up prior to the first execution. */ > > + page = i915_gem_object_get_page(ctx_obj, 1); > > + reg_state = kmap_atomic(page); > > + > > + /* A context is actually a big batch buffer with several > MI_LOAD_REGISTER_IMM > > + * commands followed by (reg, value) pairs. The values we are > setting here are > > + * only for the first context restore: on a subsequent save, the GPU > will > > + * recreate this batchbuffer with new values (including all the missing > > + * MI_LOAD_REGISTER_IMM commands that we are not initializing > here). */ > > + if (ring->id == RCS) > > + reg_state[CTX_LRI_HEADER_0] = > MI_LOAD_REGISTER_IMM(14); > > + else > > + reg_state[CTX_LRI_HEADER_0] = > MI_LOAD_REGISTER_IMM(11); > > + reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED; > > + reg_state[CTX_CONTEXT_CONTROL] = > RING_CONTEXT_CONTROL(ring); > > + reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) | > MI_RESTORE_INHIBIT; > > + reg_state[CTX_CONTEXT_CONTROL+1] |= > reg_state[CTX_CONTEXT_CONTROL+1] > > +<< 16; > > If we can, we should probably use _MASKED_BIT_ENABLE() here to make it > more obvious why we're doing the or+shift. ACK > > + reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); > > + reg_state[CTX_RING_HEAD+1] = 0; > > + reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); > > + reg_state[CTX_RING_TAIL+1] = 0; > > + reg_state[CTX_RING_BUFFER_START] = RING_START(ring- > >mmio_base); > > + reg_state[CTX_RING_BUFFER_START+1] = > i915_gem_obj_ggtt_offset(ring_obj); > > + reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring- > >mmio_base); > > + reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) | > > +RING_VALID; > > The size here doesn't look right to me. Shouldn't it be (number of pages - 1)? > See init_ring_common() But that´s exactly what it is, right? Our ringbuf->size = 32 * PAGE_SIZE; so we are setting 31 * PAGE_SIZE > > + reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168; > > + reg_state[CTX_BB_HEAD_U+1] = 0; > > + reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140; > > + reg_state[CTX_BB_HEAD_L+1] = 0; > > + reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110; > > + reg_state[CTX_BB_STATE+1] = (1<<5); > > + reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c; > > + reg_state[CTX_SECOND_BB_HEAD_U+1] = 0; > > + reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114; > > + reg_state[CTX_SECOND_BB_HEAD_L+1] = 0; > > + reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; > > + reg_state[CTX_SECOND_BB_STATE+1] = 0; > > + if (ring->id == RCS) { > > + reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + > 0x1c0; > > + reg_state[CTX_BB_PER_CTX_PTR+1] = 0; > > + reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + > 0x1c4; > > + reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; > > + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring- > >mmio_base + 0x1c8; > > + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; > > + } > > + reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); > > + reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; > > + reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; > > + reg_state[CTX_CTX_TIMESTAMP+1] = 0; > > + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); > > + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); > > + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); > > + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); > > + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); > > + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); > > + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > > + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > > + reg_state[CTX_PDP3_UDW+1] = (u64)ppgtt->pd_dma_addr[3] >> 32; > > + reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3]; > > + reg_state[CTX_PDP2_UDW+1] = (u64)ppgtt->pd_dma_addr[2] >> 32; > > + reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2]; > > + reg_state[CTX_PDP1_UDW+1] = (u64)ppgtt->pd_dma_addr[1] >> 32; > > + reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1]; > > + reg_state[CTX_PDP0_UDW+1] = (u64)ppgtt->pd_dma_addr[0] >> 32; > > + reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0]; > > Are we able to use upper_32_bits() and lower_32_bits() for these? ACK -- Oscar _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx