On Fri, Jun 13, 2014 at 08:37:44AM -0700, oscar.mateo@xxxxxxxxx wrote: > From: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > Well, new-ish: if all this code looks familiar, that's because it's > a clone of the existing submission mechanism (with some modifications > here and there to adapt it to LRCs and Execlists). > > And why did we do this? Execlists offer several advantages, like > control over when the GPU is done with a given workload, that can > help simplify the submission mechanism, no doubt, but I am interested > in getting Execlists to work first and foremost. As we are creating > a parallel submission mechanism (even if itñś just a clone), we can > now start improving it without the fear of breaking old gens. > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_lrc.c | 214 +++++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.h | 18 ++++ > 2 files changed, 232 insertions(+) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 02fc3d0..89aed7a 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -86,6 +86,220 @@ bool intel_enable_execlists(struct drm_device *dev) > return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev); > } > > +static inline struct intel_ringbuffer * > +logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx) > +{ > + return ctx->engine[ring->id].ringbuf; > +} > + > +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > + > + intel_logical_ring_advance(ringbuf); > + > + if (intel_ring_stopped(ring)) > + return; > + > + ring->submit_ctx(ring, ctx, ringbuf->tail); > +} > + > +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > + struct intel_context *ctx) > +{ > + if (ring->outstanding_lazy_seqno) > + return 0; > + > + if (ring->preallocated_lazy_request == NULL) { > + struct drm_i915_gem_request *request; > + > + request = kmalloc(sizeof(*request), GFP_KERNEL); > + if (request == NULL) > + return -ENOMEM; > + > + ring->preallocated_lazy_request = request; > + } > + > + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); > +} > + > +static int logical_ring_wait_request(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx, > + int bytes) > +{ > + struct drm_i915_gem_request *request; > + u32 seqno = 0; > + int ret; > + > + if (ringbuf->last_retired_head != -1) { > + ringbuf->head = ringbuf->last_retired_head; > + ringbuf->last_retired_head = -1; > + > + ringbuf->space = intel_ring_space(ringbuf); > + if (ringbuf->space >= bytes) > + return 0; > + } > + > + list_for_each_entry(request, &ring->request_list, list) { > + if (__intel_ring_space(request->tail, ringbuf->tail, > + ringbuf->size) >= bytes) { > + seqno = request->seqno; > + break; > + } > + } > + > + if (seqno == 0) > + return -ENOSPC; > + > + ret = i915_wait_seqno(ring, seqno); > + if (ret) > + return ret; > + > + /* TODO: make sure we update the right ringbuffer's last_retired_head > + * when retiring requests */ > + i915_gem_retire_requests_ring(ring); > + ringbuf->head = ringbuf->last_retired_head; > + ringbuf->last_retired_head = -1; > + > + ringbuf->space = intel_ring_space(ringbuf); > + return 0; > +} > + > +static int logical_ring_wait_for_space(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx, > + int bytes) > +{ > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + unsigned long end; > + int ret; > + > + ret = logical_ring_wait_request(ring, ringbuf, ctx, bytes); > + if (ret != -ENOSPC) > + return ret; > + > + /* Force the context submission in case we have been skipping it */ > + intel_logical_ring_advance_and_submit(ring, ctx); > + > + /* With GEM the hangcheck timer should kick us out of the loop, > + * leaving it early runs the risk of corrupting GEM state (due > + * to running on almost untested codepaths). But on resume > + * timers don't work yet, so prevent a complete hang in that > + * case by choosing an insanely large timeout. */ > + end = jiffies + 60 * HZ; > + In the legacy ringbuffer version, there are tracepoints around the do loop. Should we keep those? Or add lrc specific equivalents? > + do { > + ringbuf->head = I915_READ_HEAD(ring); > + ringbuf->space = intel_ring_space(ringbuf); > + if (ringbuf->space >= bytes) { > + ret = 0; > + break; > + } > + > + if (!drm_core_check_feature(dev, DRIVER_MODESET) && > + dev->primary->master) { > + struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; > + if (master_priv->sarea_priv) > + master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; > + } > + > + msleep(1); > + > + if (dev_priv->mm.interruptible && signal_pending(current)) { > + ret = -ERESTARTSYS; > + break; > + } > + > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > + dev_priv->mm.interruptible); > + if (ret) > + break; > + > + if (time_after(jiffies, end)) { > + ret = -EBUSY; > + break; > + } > + } while (1); > + > + return ret; > +} > + > +static int logical_ring_wrap_buffer(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx) > +{ > + uint32_t __iomem *virt; > + int rem = ringbuf->size - ringbuf->tail; > + > + if (ringbuf->space < rem) { > + int ret = logical_ring_wait_for_space(ring, ringbuf, ctx, rem); > + if (ret) > + return ret; > + } > + > + virt = ringbuf->virtual_start + ringbuf->tail; > + rem /= 4; > + while (rem--) > + iowrite32(MI_NOOP, virt++); > + > + ringbuf->tail = 0; > + ringbuf->space = intel_ring_space(ringbuf); > + > + return 0; > +} > + > +static int logical_ring_prepare(struct intel_engine_cs *ring, > + struct intel_ringbuffer *ringbuf, > + struct intel_context *ctx, > + int bytes) > +{ > + int ret; > + > + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { > + ret = logical_ring_wrap_buffer(ring, ringbuf, ctx); > + if (unlikely(ret)) > + return ret; > + } > + > + if (unlikely(ringbuf->space < bytes)) { > + ret = logical_ring_wait_for_space(ring, ringbuf, ctx, bytes); > + if (unlikely(ret)) > + return ret; > + } > + > + return 0; > +} > + > +int intel_logical_ring_begin(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + int num_dwords) > +{ > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx); > + int ret; > + > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > + dev_priv->mm.interruptible); > + if (ret) > + return ret; > + > + ret = logical_ring_prepare(ring, ringbuf, ctx, > + num_dwords * sizeof(uint32_t)); > + if (ret) > + return ret; > + > + /* Preallocate the olr before touching the ring */ > + ret = logical_ring_alloc_seqno(ring, ctx); > + if (ret) > + return ret; > + > + ringbuf->space -= num_dwords * sizeof(uint32_t); > + return 0; > +} > + > static int gen8_init_common_ring(struct intel_engine_cs *ring) > { > struct drm_device *dev = ring->dev; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index 26b0949..686ebf5 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -5,6 +5,24 @@ > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); > int intel_logical_rings_init(struct drm_device *dev); > > +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring, > + struct intel_context *ctx); > + > +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) > +{ > + ringbuf->tail &= ringbuf->size - 1; > +} > + > +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) > +{ > + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); > + ringbuf->tail += 4; > +} > + > +int intel_logical_ring_begin(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + int num_dwords); > + I think all of these are only used in intel_lrc.c, so don't need to be in the header and could all be static. Right? Brad > /* Logical Ring Contexts */ > void intel_lr_context_free(struct intel_context *ctx); > int intel_lr_context_deferred_create(struct intel_context *ctx, > -- > 1.9.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx