Re: [PATCH 19/49] drm/i915/bdw: Populate LR contexts (somewhat)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 15, 2014 at 11:08:02PM +0200, Daniel Vetter wrote:
> On Tue, Apr 15, 2014 at 03:43:23PM -0500, Jeff McGee wrote:
> > On Tue, Apr 15, 2014 at 11:10:34AM -0500, Jeff McGee wrote:
> > > On Tue, Apr 15, 2014 at 11:00:33AM -0500, Jeff McGee wrote:
> > > > On Thu, Mar 27, 2014 at 05:59:48PM +0000, oscar.mateo@xxxxxxxxx wrote:
> > > > > From: Ben Widawsky <benjamin.widawsky@xxxxxxxxx>
> > > > > 
> > > > > For the most part, logical rinf context objects are similar to hardware
> > > > > contexts in that the backing object is meant to be opaque. There are
> > > > > some exceptions where we need to poke certain offsets of the object for
> > > > > initialization, updating the tail pointer or updating the PDPs.
> > > > > 
> > > > > For our basic execlist implementation we'll only need our PPGTT PDs,
> > > > > and ringbuffer addresses in order to set up the context. With previous
> > > > > patches, we have both, so start prepping the context to be load.
> > > > > 
> > > > > Before running a context for the first time you must populate some
> > > > > fields in the context object. These fields begin 1 PAGE + LRCA, ie. the
> > > > > first page (in 0 based counting) of the context  image. These same
> > > > > fields will be read and written to as contexts are saved and restored
> > > > > once the system is up and running.
> > > > > 
> > > > > Many of these fields are completely reused from previous global
> > > > > registers: ringbuffer head/tail/control, context control matches some
> > > > > previous MI_SET_CONTEXT flags, and page directories. There are other
> > > > > fields which we don't touch which we may want in the future.
> > > > > 
> > > > > Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx>
> > > > > 
> > > > > v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11)
> > > > > for other engines.
> > > > > 
> > > > > Signed-off-by: Rafael Barbalho <rafael.barbalho@xxxxxxxxx>
> > > > > 
> > > > > v3: Several rebases and general changes to the code.
> > > > > 
> > > > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/i915_lrc.c | 145 ++++++++++++++++++++++++++++++++++++++--
> > > > >  1 file changed, 138 insertions(+), 7 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/i915_lrc.c b/drivers/gpu/drm/i915/i915_lrc.c
> > > > > index 40dfa95..f0176ff 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_lrc.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_lrc.c
> > > > > @@ -43,6 +43,38 @@
> > > > >  
> > > > >  #define GEN8_LR_CONTEXT_SIZE (21 * PAGE_SIZE)
> > > > >  
> > > > > +#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
> > > > > +#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
> > > > > +
> > > > > +#define CTX_LRI_HEADER_0		0x01
> > > > > +#define CTX_CONTEXT_CONTROL		0x02
> > > > > +#define CTX_RING_HEAD			0x04
> > > > > +#define CTX_RING_TAIL			0x06
> > > > > +#define CTX_RING_BUFFER_START		0x08
> > > > > +#define CTX_RING_BUFFER_CONTROL	0x0a
> > > > > +#define CTX_BB_HEAD_U			0x0c
> > > > > +#define CTX_BB_HEAD_L			0x0e
> > > > > +#define CTX_BB_STATE			0x10
> > > > > +#define CTX_SECOND_BB_HEAD_U		0x12
> > > > > +#define CTX_SECOND_BB_HEAD_L		0x14
> > > > > +#define CTX_SECOND_BB_STATE		0x16
> > > > > +#define CTX_BB_PER_CTX_PTR		0x18
> > > > > +#define CTX_RCS_INDIRECT_CTX		0x1a
> > > > > +#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
> > > > > +#define CTX_LRI_HEADER_1		0x21
> > > > > +#define CTX_CTX_TIMESTAMP		0x22
> > > > > +#define CTX_PDP3_UDW			0x24
> > > > > +#define CTX_PDP3_LDW			0x26
> > > > > +#define CTX_PDP2_UDW			0x28
> > > > > +#define CTX_PDP2_LDW			0x2a
> > > > > +#define CTX_PDP1_UDW			0x2c
> > > > > +#define CTX_PDP1_LDW			0x2e
> > > > > +#define CTX_PDP0_UDW			0x30
> > > > > +#define CTX_PDP0_LDW			0x32
> > > > > +#define CTX_LRI_HEADER_2		0x41
> > > > > +#define CTX_R_PWR_CLK_STATE		0x42
> > > > > +#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
> > > > > +
> > > > >  struct i915_hw_context *
> > > > >  gen8_gem_create_context(struct drm_device *dev,
> > > > >  			struct intel_engine *ring,
> > > > > @@ -51,6 +83,9 @@ gen8_gem_create_context(struct drm_device *dev,
> > > > >  {
> > > > >  	struct i915_hw_context *ctx = NULL;
> > > > >  	struct drm_i915_gem_object *ring_obj = NULL;
> > > > > +	struct i915_hw_ppgtt *ppgtt = NULL;
> > > > > +	struct page *page;
> > > > > +	uint32_t *reg_state;
> > > > >  	int ret;
> > > > >  
> > > > >  	ctx = i915_gem_create_context(dev, file_priv, create_vm);
> > > > > @@ -79,18 +114,114 @@ gen8_gem_create_context(struct drm_device *dev,
> > > > >  
> > > > >  	/* Failure at this point is almost impossible */
> > > > >  	ret = i915_gem_object_set_to_gtt_domain(ring_obj, true);
> > > > > -	if (ret) {
> > > > > -		i915_gem_object_ggtt_unpin(ring_obj);
> > > > > -		drm_gem_object_unreference(&ring_obj->base);
> > > > > -		i915_gem_object_ggtt_unpin(ctx->obj);
> > > > > -		i915_gem_context_unreference(ctx);
> > > > > -		return ERR_PTR(ret);
> > > > > -	}
> > > > > +	if (ret)
> > > > > +		goto destroy_ring_obj;
> > > > >  
> > > > >  	ctx->ringbuf = &ring->default_ringbuf;
> > > > >  	ctx->ringbuf->obj = ring_obj;
> > > > >  
> > > > > +	ppgtt = ctx_to_ppgtt(ctx);
> > > > > +
> > > > > +	ret = i915_gem_object_set_to_cpu_domain(ctx->obj, true);
> > > > > +	if (ret)
> > > > > +		goto destroy_ring_obj;
> > > > > +
> > > > > +	ret = i915_gem_object_get_pages(ctx->obj);
> > > > > +	if (ret)
> > > > > +		goto destroy_ring_obj;
> > > > > +
> > > > > +	i915_gem_object_pin_pages(ctx->obj);
> > > > > +
> > > > > +	/* The second page of the context object contains some fields which must
> > > > > +	 * be set up prior to the first execution.
> > > > > +	 */
> > > > > +	page = i915_gem_object_get_page(ctx->obj, 1);
> > > > > +	reg_state = kmap_atomic(page);
> > > > > +
> > > > > +	if (ring->id == RCS)
> > > > > +		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
> > > > > +	else
> > > > > +		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
> > > > > +	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
> > > > > +	reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) | MI_RESTORE_INHIBIT;
> > > > > +	reg_state[CTX_CONTEXT_CONTROL+1] |= reg_state[CTX_CONTEXT_CONTROL+1] << 16;
> > > > > +	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
> > > > > +	reg_state[CTX_RING_HEAD+1] = 0;
> > > > > +	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
> > > > > +	reg_state[CTX_RING_TAIL+1] = 0;
> > > > > +	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
> > > > > +	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
> > > > > +	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
> > > > > +	reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) | RING_VALID;
> > > > > +	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
> > > > > +	reg_state[CTX_BB_HEAD_U+1] = 0;
> > > > > +	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
> > > > > +	reg_state[CTX_BB_HEAD_L+1] = 0;
> > > > > +	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
> > > > > +	reg_state[CTX_BB_STATE+1] = (1<<5);
> > > > > +	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
> > > > > +	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
> > > > > +	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
> > > > > +	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
> > > > > +	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
> > > > > +	reg_state[CTX_SECOND_BB_STATE+1] = 0;
> > > > > +	if (ring->id == RCS) {
> > > > > +		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
> > > > > +		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
> > > > > +		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
> > > > > +		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
> > > > > +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
> > > > > +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
> > > > > +	}
> > > > > +
> > > > > +	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
> > > > > +	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
> > > > > +	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
> > > > > +	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
> > > > > +	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
> > > > > +	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
> > > > > +	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
> > > > > +	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
> > > > > +	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
> > > > > +	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> > > > > +	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> > > > > +	reg_state[CTX_PDP3_UDW+1] = ppgtt->pd_dma_addr[3] >> 32;
> > > > > +	reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3];
> > > > > +	reg_state[CTX_PDP2_UDW+1] = ppgtt->pd_dma_addr[2] >> 32;
> > > > > +	reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2];
> > > > > +	reg_state[CTX_PDP1_UDW+1] = ppgtt->pd_dma_addr[1] >> 32;
> > > > > +	reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1];
> > > > > +	reg_state[CTX_PDP0_UDW+1] = ppgtt->pd_dma_addr[0] >> 32;
> > > > > +	reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0];
> > > > > +	if (ring->id == RCS) {
> > > > > +		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> > > > > +		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
> > > > 
> > > > You're writing the MMIO address for the R_PWR_CLK_STATE register to this
> > > > field. Shouldn't this receive the value we want programmed to the register?
> > > > 
> > > 
> > > Oh, nevermind. I understand now.
> > > -Jeff
> > > 
> > To clarify my comments...I was at first confused by the need to specify the
> > R_PWR_CLK_STATE register address in the logical context, thinking that only
> > the desired value needed to be specified. But I see now that the programming
> > model is to specify the MI_LOAD_REGISTER_IMM command, followed by the address
> > at which to load, followed by the value to load.
> > 
> > Reflecting on my initial confusion, would it be clearer to provide names for
> > each dword position in the context image, rather than using an unnamed offset
> > like CTX_R_PWR_CLK_STATE+1? Example:
> > 
> > reg_state[CTX_R_PWR_CLK_STATE_ADDR] = 0x20c8
> > reg_state[CTX_R_PWR_CLK_STATE_DATA] = 0;
> 
> Usually when we emit batches in userspace (and the context is nothing else
> really) we have some OUT_BATCH macro which writes the dword and increments
> the pointer. Since MI_LOAD_REGISTER_IMM is multi-length we could add a
> OUT_BATCH_REG_WRITE(reg, value) which does both dword emissions.
> 
> That should clarify a lot what's going on here. We might even completely
> drop all the offset #defines and replace them with a few comments or so.
> -Daniel
> 
OK, now I get it. My mistake was in thinking the context image is just pure
state that hardware already knows how to restore. But as you say it is more
like a batch which includes the state *and* the MI_LOAD_REGISTER_IMM commands
required to restore. So in that sense I understand that the approach here to
initilize the context is much like constructing a batch. But later when we
want to update the value of a context field we have (in a later patch of this
series): 

reg_state[CTX_RING_TAIL+1] = value;

This is a bit obscure when occurring by itself and not in the flow of
initializing the context (batch). The same will be true when we add management
of the CTX_R_PWR_CLK_STATE value dword.
-Jeff

> > 
> > Jeff
> > > > > +		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
> > > > > +	}
> > > > > +
> > > > > +#if 0
> > > > > +	/* Offsets not yet defined for these */
> > > > > +	reg_state[CTX_GPGPU_CSR_BASE_ADDRESS[] = ;
> > > > > +	reg_state[CTX_GPGPU_CSR_BASE_ADDRESS+1] = 0;
> > > > > +#endif
> > > > > +
> > > > > +	kunmap_atomic(reg_state);
> > > > > +
> > > > > +	ctx->obj->dirty = 1;
> > > > > +	set_page_dirty(page);
> > > > > +	i915_gem_object_unpin_pages(ctx->obj);
> > > > > +
> > > > >  	return ctx;
> > > > > +
> > > > > +destroy_ring_obj:
> > > > > +	i915_gem_object_ggtt_unpin(ring_obj);
> > > > > +	drm_gem_object_unreference(&ring_obj->base);
> > > > > +	ctx->ringbuf->obj = NULL;
> > > > > +	ctx->ringbuf = NULL;
> > > > > +	i915_gem_object_ggtt_unpin(ctx->obj);
> > > > > +	i915_gem_context_unreference(ctx);
> > > > > +
> > > > > +	return ERR_PTR(ret);
> > > > >  }
> > > > >  
> > > > >  void gen8_gem_context_fini(struct drm_device *dev)
> > > > > -- 
> > > > > 1.9.0
> > > > > 
> > > > > _______________________________________________
> > > > > Intel-gfx mailing list
> > > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > > _______________________________________________
> > > > Intel-gfx mailing list
> > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux