On Wed, Jan 29, 2014 at 11:55:26AM -0800, Ben Widawsky wrote: > Semaphore signalling works similarly to previous GENs with the exception > that the per ring mailboxes no longer exist. Instead you must define > your own space, somewhere in the GTT. > > The comments in the code define the layout I've opted for, which should > be fairly future proof. Ie. I tried to define offsets in abstract terms > (NUM_RINGS, seqno size, etc). > > NOTE: If one wanted to move this to the HWSP they could. I've decided > one 4k object would be easier to deal with, and provide potential wins > with cache locality, but that's all speculative. > > v2: Update the macro to not need the other ring's ring->id (Chris) > Update the comment to use the correct formula (Chris) > > Signed-off-by: Ben Widawsky <ben@xxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_reg.h | 5 +- > drivers/gpu/drm/i915/intel_ringbuffer.c | 199 +++++++++++++++++++++++++------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++- > 4 files changed, 197 insertions(+), 46 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3673ba1..f521059 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1380,6 +1380,7 @@ typedef struct drm_i915_private { > > struct pci_dev *bridge_dev; > struct intel_ring_buffer ring[I915_NUM_RINGS]; > + struct drm_i915_gem_object *semaphore_obj; > uint32_t last_seqno, next_seqno; > > drm_dma_handle_t *status_page_dmah; > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index cbbaf26..8b745dc 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -216,7 +216,7 @@ > #define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) > #define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) > #define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) > -#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ > +#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ > #define MI_SEMAPHORE_GLOBAL_GTT (1<<22) > #define MI_SEMAPHORE_UPDATE (1<<21) > #define MI_SEMAPHORE_COMPARE (1<<20) > @@ -241,6 +241,8 @@ > #define MI_RESTORE_EXT_STATE_EN (1<<2) > #define MI_FORCE_RESTORE (1<<1) > #define MI_RESTORE_INHIBIT (1<<0) > +#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ > +#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) > #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) > #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ > #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) > @@ -329,6 +331,7 @@ > #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ > #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) > #define PIPE_CONTROL_NOTIFY (1<<8) > +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ > #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) > #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) > #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 37ae2b1..b750835 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -619,6 +619,13 @@ static int init_render_ring(struct intel_ring_buffer *ring) > static void render_ring_cleanup(struct intel_ring_buffer *ring) > { > struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + if (dev_priv->semaphore_obj) { > + i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); > + drm_gem_object_unreference(&dev_priv->semaphore_obj->base); > + dev_priv->semaphore_obj = NULL; > + } > > if (ring->scratch.obj == NULL) > return; > @@ -632,6 +639,86 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring) > ring->scratch.obj = NULL; > } > > +static int gen8_rcs_signal(struct intel_ring_buffer *signaller, > + unsigned int num_dwords) > +{ > +#define MBOX_UPDATE_DWORDS 8 > + struct drm_device *dev = signaller->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_ring_buffer *waiter; > + int i, ret, num_rings; > + > + num_rings = hweight_long(INTEL_INFO(dev)->ring_mask); > + num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS; Again num_dwords += > +#undef MBOX_UPDATE_DWORDS > + > + /* XXX: + 4 for the caller */ > + ret = intel_ring_begin(signaller, num_dwords + 4); and the +4 goes away. > + if (ret) > + return ret; > + > + for_each_ring(waiter, dev_priv, i) { > + u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; > + if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > + continue; > + > + intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); > + intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | > + PIPE_CONTROL_QW_WRITE | > + PIPE_CONTROL_FLUSH_ENABLE); > + intel_ring_emit(signaller, lower_32_bits(gtt_offset)); > + intel_ring_emit(signaller, upper_32_bits(gtt_offset)); > + intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); > + intel_ring_emit(signaller, 0); > + intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | > + MI_SEMAPHORE_TARGET(waiter->id)); > + intel_ring_emit(signaller, 0); > + } > + > + WARN_ON(i != num_rings); > + > + return 0; > +} <snip> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index c69ae10..f1e7a66 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -111,6 +111,39 @@ struct intel_ring_buffer { > #define I915_DISPATCH_PINNED 0x2 > void (*cleanup)(struct intel_ring_buffer *ring); > > + /* GEN8 signal/wait table > + * signal to signal to signal to signal to > + * RCS VCS BCS VECS > + * ------------------------------------------------------ > + * RCS | NOP (0x00) | BCS (0x08) | VCS (0x10) | VECS (0x18) | > + * |----------------------------------------------------- > + * VCS | RCS (0x20) | NOP (0x28) | BCS (0x30) | VECS (0x38) | > + * |----------------------------------------------------- > + * BCS | RCS (0x40) | VCS (0x48) | NOP (0x50) | VECS (0x58) | > + * |----------------------------------------------------- > + * VECS | RCS (0x60) | VCS (0x68) | BCS (0x70) | NOP (0x78) | > + * |----------------------------------------------------- > + * > + * Generalization: > + * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) > + * ie. transpose of g(x, y) > + * > + * sync from sync from sync from sync from > + * RCS VCS BCS VECS > + * ------------------------------------------------------ > + * RCS | NOP (0x00) | BCS (0x20) | VCS (0x40) | VECS (0x60) | > + * |----------------------------------------------------- > + * VCS | RCS (0x08) | NOP (0x28) | BCS (0x48) | VECS (0x68) | > + * |----------------------------------------------------- > + * BCS | RCS (0x10) | VCS (0x30) | NOP (0x50) | VECS (0x60) | > + * |----------------------------------------------------- > + * VECS | RCS (0x18) | VCS (0x38) | BCS (0x58) | NOP (0x78) | > + * |----------------------------------------------------- > + * > + * Generalization: > + * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) > + * ie. transpose of f(x, y) > + */ > struct { > u32 sync_seqno[I915_NUM_RINGS-1]; > /* AKA wait() */ > @@ -120,7 +153,10 @@ struct intel_ring_buffer { > /* our mbox written by others */ > u32 mbox[I915_NUM_RINGS]; mbox should also get a u64 friend, right? > /* mboxes this ring signals to */ > - u32 signal_mbox[I915_NUM_RINGS]; > + union { > + u32 signal_mbox[I915_NUM_RINGS]; > + u64 signal_ggtt[I915_NUM_RINGS]; > + }; > > /* num_dwords is space the caller will need for atomic update */ > int (*signal)(struct intel_ring_buffer *signaller, > -- > 1.8.5.3 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx