On Tue, Apr 15, 2014 at 11:08:02PM +0200, Daniel Vetter wrote: > On Tue, Apr 15, 2014 at 03:43:23PM -0500, Jeff McGee wrote: > > On Tue, Apr 15, 2014 at 11:10:34AM -0500, Jeff McGee wrote: > > > On Tue, Apr 15, 2014 at 11:00:33AM -0500, Jeff McGee wrote: > > > > On Thu, Mar 27, 2014 at 05:59:48PM +0000, oscar.mateo@xxxxxxxxx wrote: > > > > > From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx> > > > > > > > > > > For the most part, logical rinf context objects are similar to hardware > > > > > contexts in that the backing object is meant to be opaque. There are > > > > > some exceptions where we need to poke certain offsets of the object for > > > > > initialization, updating the tail pointer or updating the PDPs. > > > > > > > > > > For our basic execlist implementation we'll only need our PPGTT PDs, > > > > > and ringbuffer addresses in order to set up the context. With previous > > > > > patches, we have both, so start prepping the context to be load. > > > > > > > > > > Before running a context for the first time you must populate some > > > > > fields in the context object. These fields begin 1 PAGE + LRCA, ie. the > > > > > first page (in 0 based counting) of the context image. These same > > > > > fields will be read and written to as contexts are saved and restored > > > > > once the system is up and running. > > > > > > > > > > Many of these fields are completely reused from previous global > > > > > registers: ringbuffer head/tail/control, context control matches some > > > > > previous MI_SET_CONTEXT flags, and page directories. There are other > > > > > fields which we don't touch which we may want in the future. > > > > > > > > > > Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> > > > > > > > > > > v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11) > > > > > for other engines. > > > > > > > > > > Signed-off-by: Rafael Barbalho <rafael.barbalho@xxxxxxxxx> > > > > > > > > > > v3: Several rebases and general changes to the code. > > > > > > > > > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > > > > --- > > > > > drivers/gpu/drm/i915/i915_lrc.c | 145 ++++++++++++++++++++++++++++++++++++++-- > > > > > 1 file changed, 138 insertions(+), 7 deletions(-) > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_lrc.c b/drivers/gpu/drm/i915/i915_lrc.c > > > > > index 40dfa95..f0176ff 100644 > > > > > --- a/drivers/gpu/drm/i915/i915_lrc.c > > > > > +++ b/drivers/gpu/drm/i915/i915_lrc.c > > > > > @@ -43,6 +43,38 @@ > > > > > > > > > > #define GEN8_LR_CONTEXT_SIZE (21 * PAGE_SIZE) > > > > > > > > > > +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) > > > > > +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) > > > > > + > > > > > +#define CTX_LRI_HEADER_0 0x01 > > > > > +#define CTX_CONTEXT_CONTROL 0x02 > > > > > +#define CTX_RING_HEAD 0x04 > > > > > +#define CTX_RING_TAIL 0x06 > > > > > +#define CTX_RING_BUFFER_START 0x08 > > > > > +#define CTX_RING_BUFFER_CONTROL 0x0a > > > > > +#define CTX_BB_HEAD_U 0x0c > > > > > +#define CTX_BB_HEAD_L 0x0e > > > > > +#define CTX_BB_STATE 0x10 > > > > > +#define CTX_SECOND_BB_HEAD_U 0x12 > > > > > +#define CTX_SECOND_BB_HEAD_L 0x14 > > > > > +#define CTX_SECOND_BB_STATE 0x16 > > > > > +#define CTX_BB_PER_CTX_PTR 0x18 > > > > > +#define CTX_RCS_INDIRECT_CTX 0x1a > > > > > +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c > > > > > +#define CTX_LRI_HEADER_1 0x21 > > > > > +#define CTX_CTX_TIMESTAMP 0x22 > > > > > +#define CTX_PDP3_UDW 0x24 > > > > > +#define CTX_PDP3_LDW 0x26 > > > > > +#define CTX_PDP2_UDW 0x28 > > > > > +#define CTX_PDP2_LDW 0x2a > > > > > +#define CTX_PDP1_UDW 0x2c > > > > > +#define CTX_PDP1_LDW 0x2e > > > > > +#define CTX_PDP0_UDW 0x30 > > > > > +#define CTX_PDP0_LDW 0x32 > > > > > +#define CTX_LRI_HEADER_2 0x41 > > > > > +#define CTX_R_PWR_CLK_STATE 0x42 > > > > > +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 > > > > > + > > > > > struct i915_hw_context * > > > > > gen8_gem_create_context(struct drm_device *dev, > > > > > struct intel_engine *ring, > > > > > @@ -51,6 +83,9 @@ gen8_gem_create_context(struct drm_device *dev, > > > > > { > > > > > struct i915_hw_context *ctx = NULL; > > > > > struct drm_i915_gem_object *ring_obj = NULL; > > > > > + struct i915_hw_ppgtt *ppgtt = NULL; > > > > > + struct page *page; > > > > > + uint32_t *reg_state; > > > > > int ret; > > > > > > > > > > ctx = i915_gem_create_context(dev, file_priv, create_vm); > > > > > @@ -79,18 +114,114 @@ gen8_gem_create_context(struct drm_device *dev, > > > > > > > > > > /* Failure at this point is almost impossible */ > > > > > ret = i915_gem_object_set_to_gtt_domain(ring_obj, true); > > > > > - if (ret) { > > > > > - i915_gem_object_ggtt_unpin(ring_obj); > > > > > - drm_gem_object_unreference(&ring_obj->base); > > > > > - i915_gem_object_ggtt_unpin(ctx->obj); > > > > > - i915_gem_context_unreference(ctx); > > > > > - return ERR_PTR(ret); > > > > > - } > > > > > + if (ret) > > > > > + goto destroy_ring_obj; > > > > > > > > > > ctx->ringbuf = &ring->default_ringbuf; > > > > > ctx->ringbuf->obj = ring_obj; > > > > > > > > > > + ppgtt = ctx_to_ppgtt(ctx); > > > > > + > > > > > + ret = i915_gem_object_set_to_cpu_domain(ctx->obj, true); > > > > > + if (ret) > > > > > + goto destroy_ring_obj; > > > > > + > > > > > + ret = i915_gem_object_get_pages(ctx->obj); > > > > > + if (ret) > > > > > + goto destroy_ring_obj; > > > > > + > > > > > + i915_gem_object_pin_pages(ctx->obj); > > > > > + > > > > > + /* The second page of the context object contains some fields which must > > > > > + * be set up prior to the first execution. > > > > > + */ > > > > > + page = i915_gem_object_get_page(ctx->obj, 1); > > > > > + reg_state = kmap_atomic(page); > > > > > + > > > > > + if (ring->id == RCS) > > > > > + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14); > > > > > + else > > > > > + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11); > > > > > + reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); > > > > > + reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) | MI_RESTORE_INHIBIT; > > > > > + reg_state[CTX_CONTEXT_CONTROL+1] |= reg_state[CTX_CONTEXT_CONTROL+1] << 16; > > > > > + reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); > > > > > + reg_state[CTX_RING_HEAD+1] = 0; > > > > > + reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); > > > > > + reg_state[CTX_RING_TAIL+1] = 0; > > > > > + reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); > > > > > + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); > > > > > + reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); > > > > > + reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) | RING_VALID; > > > > > + reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168; > > > > > + reg_state[CTX_BB_HEAD_U+1] = 0; > > > > > + reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140; > > > > > + reg_state[CTX_BB_HEAD_L+1] = 0; > > > > > + reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110; > > > > > + reg_state[CTX_BB_STATE+1] = (1<<5); > > > > > + reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c; > > > > > + reg_state[CTX_SECOND_BB_HEAD_U+1] = 0; > > > > > + reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114; > > > > > + reg_state[CTX_SECOND_BB_HEAD_L+1] = 0; > > > > > + reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; > > > > > + reg_state[CTX_SECOND_BB_STATE+1] = 0; > > > > > + if (ring->id == RCS) { > > > > > + reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; > > > > > + reg_state[CTX_BB_PER_CTX_PTR+1] = 0; > > > > > + reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; > > > > > + reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; > > > > > + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; > > > > > + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; > > > > > + } > > > > > + > > > > > + reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); > > > > > + reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; > > > > > + reg_state[CTX_CTX_TIMESTAMP+1] = 0; > > > > > + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); > > > > > + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); > > > > > + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); > > > > > + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); > > > > > + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); > > > > > + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); > > > > > + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > > > > > + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > > > > > + reg_state[CTX_PDP3_UDW+1] = ppgtt->pd_dma_addr[3] >> 32; > > > > > + reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3]; > > > > > + reg_state[CTX_PDP2_UDW+1] = ppgtt->pd_dma_addr[2] >> 32; > > > > > + reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2]; > > > > > + reg_state[CTX_PDP1_UDW+1] = ppgtt->pd_dma_addr[1] >> 32; > > > > > + reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1]; > > > > > + reg_state[CTX_PDP0_UDW+1] = ppgtt->pd_dma_addr[0] >> 32; > > > > > + reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0]; > > > > > + if (ring->id == RCS) { > > > > > + reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > > > > > + reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; > > > > > > > > You're writing the MMIO address for the R_PWR_CLK_STATE register to this > > > > field. Shouldn't this receive the value we want programmed to the register? > > > > > > > > > > Oh, nevermind. I understand now. > > > -Jeff > > > > > To clarify my comments...I was at first confused by the need to specify the > > R_PWR_CLK_STATE register address in the logical context, thinking that only > > the desired value needed to be specified. But I see now that the programming > > model is to specify the MI_LOAD_REGISTER_IMM command, followed by the address > > at which to load, followed by the value to load. > > > > Reflecting on my initial confusion, would it be clearer to provide names for > > each dword position in the context image, rather than using an unnamed offset > > like CTX_R_PWR_CLK_STATE+1? Example: > > > > reg_state[CTX_R_PWR_CLK_STATE_ADDR] = 0x20c8 > > reg_state[CTX_R_PWR_CLK_STATE_DATA] = 0; > > Usually when we emit batches in userspace (and the context is nothing else > really) we have some OUT_BATCH macro which writes the dword and increments > the pointer. Since MI_LOAD_REGISTER_IMM is multi-length we could add a > OUT_BATCH_REG_WRITE(reg, value) which does both dword emissions. > > That should clarify a lot what's going on here. We might even completely > drop all the offset #defines and replace them with a few comments or so. > -Daniel > OK, now I get it. My mistake was in thinking the context image is just pure state that hardware already knows how to restore. But as you say it is more like a batch which includes the state *and* the MI_LOAD_REGISTER_IMM commands required to restore. So in that sense I understand that the approach here to initilize the context is much like constructing a batch. But later when we want to update the value of a context field we have (in a later patch of this series): reg_state[CTX_RING_TAIL+1] = value; This is a bit obscure when occurring by itself and not in the flow of initializing the context (batch). The same will be true when we add management of the CTX_R_PWR_CLK_STATE value dword. -Jeff > > > > Jeff > > > > > + reg_state[CTX_R_PWR_CLK_STATE+1] = 0; > > > > > + } > > > > > + > > > > > +#if 0 > > > > > + /* Offsets not yet defined for these */ > > > > > + reg_state[CTX_GPGPU_CSR_BASE_ADDRESS[] = ; > > > > > + reg_state[CTX_GPGPU_CSR_BASE_ADDRESS+1] = 0; > > > > > +#endif > > > > > + > > > > > + kunmap_atomic(reg_state); > > > > > + > > > > > + ctx->obj->dirty = 1; > > > > > + set_page_dirty(page); > > > > > + i915_gem_object_unpin_pages(ctx->obj); > > > > > + > > > > > return ctx; > > > > > + > > > > > +destroy_ring_obj: > > > > > + i915_gem_object_ggtt_unpin(ring_obj); > > > > > + drm_gem_object_unreference(&ring_obj->base); > > > > > + ctx->ringbuf->obj = NULL; > > > > > + ctx->ringbuf = NULL; > > > > > + i915_gem_object_ggtt_unpin(ctx->obj); > > > > > + i915_gem_context_unreference(ctx); > > > > > + > > > > > + return ERR_PTR(ret); > > > > > } > > > > > > > > > > void gen8_gem_context_fini(struct drm_device *dev) > > > > > -- > > > > > 1.9.0 > > > > > > > > > > _______________________________________________ > > > > > Intel-gfx mailing list > > > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > > _______________________________________________ > > > > Intel-gfx mailing list > > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > _______________________________________________ > > > Intel-gfx mailing list > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx