On Thu, Jul 24, 2014 at 05:04:26PM +0100, Thomas Daniel wrote: > From: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > Well, new-ish: if all this code looks familiar, that's because it's > a clone of the existing submission mechanism (with some modifications > here and there to adapt it to LRCs and Execlists). > > And why did we do this instead of reusing code, one might wonder? > Well, there are some fears that the differences are big enough that > they will end up breaking all platforms. > > Also, Execlists offer several advantages, like control over when the > GPU is done with a given workload, that can help simplify the > submission mechanism, no doubt. I am interested in getting Execlists > to work first and foremost, but in the future this parallel submission > mechanism will help us to fine tune the mechanism without affecting > old gens. > > v2: Pass the ringbuffer only (whenever possible). > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_lrc.c | 193 +++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.h | 12 ++ > drivers/gpu/drm/i915/intel_ringbuffer.c | 20 ++-- > drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + > 4 files changed, 218 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index f171fd5..bd37d51 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -106,6 +106,199 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) > /* TODO */ > } > > +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) > +{ > + intel_logical_ring_advance(ringbuf); > + > + if (intel_ring_stopped(ringbuf->ring)) > + return; > + > + /* TODO: how to submit a context to the ELSP is not here yet */ > +} > + > +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) > +{ > + if (ring->outstanding_lazy_seqno) > + return 0; > + > + if (ring->preallocated_lazy_request == NULL) { > + struct drm_i915_gem_request *request; > + > + request = kmalloc(sizeof(*request), GFP_KERNEL); > + if (request == NULL) > + return -ENOMEM; > + > + ring->preallocated_lazy_request = request; > + } > + > + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); > +} > + > +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int bytes) > +{ > + struct intel_engine_cs *ring = ringbuf->ring; > + struct drm_i915_gem_request *request; > + u32 seqno = 0; > + int ret; > + > + if (ringbuf->last_retired_head != -1) { > + ringbuf->head = ringbuf->last_retired_head; > + ringbuf->last_retired_head = -1; > + > + ringbuf->space = intel_ring_space(ringbuf); > + if (ringbuf->space >= bytes) > + return 0; > + } > + > + list_for_each_entry(request, &ring->request_list, list) { > + if (__intel_ring_space(request->tail, ringbuf->tail, > + ringbuf->size) >= bytes) { > + seqno = request->seqno; > + break; > + } > + } > + > + if (seqno == 0) > + return -ENOSPC; > + > + ret = i915_wait_seqno(ring, seqno); > + if (ret) > + return ret; > + > + /* TODO: make sure we update the right ringbuffer's last_retired_head > + * when retiring requests */ > + i915_gem_retire_requests_ring(ring); > + ringbuf->head = ringbuf->last_retired_head; > + ringbuf->last_retired_head = -1; > + > + ringbuf->space = intel_ring_space(ringbuf); > + return 0; > +} > + > +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, int bytes) > +{ > + struct intel_engine_cs *ring = ringbuf->ring; > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + unsigned long end; > + int ret; > + > + ret = logical_ring_wait_request(ringbuf, bytes); > + if (ret != -ENOSPC) > + return ret; > + > + /* Force the context submission in case we have been skipping it */ > + intel_logical_ring_advance_and_submit(ringbuf); > + > + /* With GEM the hangcheck timer should kick us out of the loop, > + * leaving it early runs the risk of corrupting GEM state (due > + * to running on almost untested codepaths). But on resume > + * timers don't work yet, so prevent a complete hang in that > + * case by choosing an insanely large timeout. */ > + end = jiffies + 60 * HZ; > + > + do { > + ringbuf->head = I915_READ_HEAD(ring); > + ringbuf->space = intel_ring_space(ringbuf); > + if (ringbuf->space >= bytes) { > + ret = 0; > + break; > + } > + > + if (!drm_core_check_feature(dev, DRIVER_MODESET) && > + dev->primary->master) { > + struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; > + if (master_priv->sarea_priv) > + master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; > + } sarea is legacy gunk. Really bad legacy gunk. The DRIVE_MODESET check should have been a give-away. Also checkpatch. Fixed while applying. -Daniel > + > + msleep(1); > + > + if (dev_priv->mm.interruptible && signal_pending(current)) { > + ret = -ERESTARTSYS; > + break; > + } > + > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > + dev_priv->mm.interruptible); > + if (ret) > + break; > + > + if (time_after(jiffies, end)) { > + ret = -EBUSY; > + break; > + } > + } while (1); > + > + return ret; > +} > + > +static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf) > +{ > + uint32_t __iomem *virt; > + int rem = ringbuf->size - ringbuf->tail; > + > + if (ringbuf->space < rem) { > + int ret = logical_ring_wait_for_space(ringbuf, rem); > + if (ret) > + return ret; > + } > + > + virt = ringbuf->virtual_start + ringbuf->tail; > + rem /= 4; > + while (rem--) > + iowrite32(MI_NOOP, virt++); > + > + ringbuf->tail = 0; > + ringbuf->space = intel_ring_space(ringbuf); > + > + return 0; > +} > + > +static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) > +{ > + int ret; > + > + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { > + ret = logical_ring_wrap_buffer(ringbuf); > + if (unlikely(ret)) > + return ret; > + } > + > + if (unlikely(ringbuf->space < bytes)) { > + ret = logical_ring_wait_for_space(ringbuf, bytes); > + if (unlikely(ret)) > + return ret; > + } > + > + return 0; > +} > + > +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) > +{ > + struct intel_engine_cs *ring = ringbuf->ring; > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + int ret; > + > + ret = i915_gem_check_wedge(&dev_priv->gpu_error, > + dev_priv->mm.interruptible); > + if (ret) > + return ret; > + > + ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t)); > + if (ret) > + return ret; > + > + /* Preallocate the olr before touching the ring */ > + ret = logical_ring_alloc_seqno(ring); > + if (ret) > + return ret; > + > + ringbuf->space -= num_dwords * sizeof(uint32_t); > + return 0; > +} > + > static int gen8_init_common_ring(struct intel_engine_cs *ring) > { > struct drm_device *dev = ring->dev; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index bf0eff4..16798b6 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -29,6 +29,18 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); > int intel_logical_rings_init(struct drm_device *dev); > > +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); > +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) > +{ > + ringbuf->tail &= ringbuf->size - 1; > +} > +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) > +{ > + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); > + ringbuf->tail += 4; > +} > +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); > + > /* Logical Ring Contexts */ > void intel_lr_context_free(struct intel_context *ctx); > int intel_lr_context_deferred_create(struct intel_context *ctx, > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index ca45c58..dc2a991 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring) > return ring->buffer && ring->buffer->obj; > } > > -static inline int __ring_space(int head, int tail, int size) > +int __intel_ring_space(int head, int tail, int size) > { > int space = head - (tail + I915_RING_FREE_SPACE); > if (space < 0) > @@ -65,12 +65,12 @@ static inline int __ring_space(int head, int tail, int size) > return space; > } > > -static inline int ring_space(struct intel_ringbuffer *ringbuf) > +int intel_ring_space(struct intel_ringbuffer *ringbuf) > { > - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); > + return __intel_ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); > } > > -static bool intel_ring_stopped(struct intel_engine_cs *ring) > +bool intel_ring_stopped(struct intel_engine_cs *ring) > { > struct drm_i915_private *dev_priv = ring->dev->dev_private; > return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); > @@ -561,7 +561,7 @@ static int init_ring_common(struct intel_engine_cs *ring) > else { > ringbuf->head = I915_READ_HEAD(ring); > ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; > - ringbuf->space = ring_space(ringbuf); > + ringbuf->space = intel_ring_space(ringbuf); > ringbuf->last_retired_head = -1; > } > > @@ -1679,13 +1679,13 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) > ringbuf->head = ringbuf->last_retired_head; > ringbuf->last_retired_head = -1; > > - ringbuf->space = ring_space(ringbuf); > + ringbuf->space = intel_ring_space(ringbuf); > if (ringbuf->space >= n) > return 0; > } > > list_for_each_entry(request, &ring->request_list, list) { > - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { > + if (__intel_ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { > seqno = request->seqno; > break; > } > @@ -1702,7 +1702,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) > ringbuf->head = ringbuf->last_retired_head; > ringbuf->last_retired_head = -1; > > - ringbuf->space = ring_space(ringbuf); > + ringbuf->space = intel_ring_space(ringbuf); > return 0; > } > > @@ -1731,7 +1731,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) > trace_i915_ring_wait_begin(ring); > do { > ringbuf->head = I915_READ_HEAD(ring); > - ringbuf->space = ring_space(ringbuf); > + ringbuf->space = intel_ring_space(ringbuf); > if (ringbuf->space >= n) { > ret = 0; > break; > @@ -1783,7 +1783,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) > iowrite32(MI_NOOP, virt++); > > ringbuf->tail = 0; > - ringbuf->space = ring_space(ringbuf); > + ringbuf->space = intel_ring_space(ringbuf); > > return 0; > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index c135334..c305df0 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -373,6 +373,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring) > struct intel_ringbuffer *ringbuf = ring->buffer; > ringbuf->tail &= ringbuf->size - 1; > } > +int __intel_ring_space(int head, int tail, int size); > +int intel_ring_space(struct intel_ringbuffer *ringbuf); > +bool intel_ring_stopped(struct intel_engine_cs *ring); > void __intel_ring_advance(struct intel_engine_cs *ring); > > int __must_check intel_ring_idle(struct intel_engine_cs *ring); > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx