On Thu, Aug 09, 2012 at 10:58:30AM +0100, Chris Wilson wrote: > Avoid the forcewake overhead when simply retiring requests, as often the > last seen seqno is good enough to satisfy the retirment process and will > be promptly re-run in any case. Only ensure that we force the coherent > seqno read when we are explicitly waiting upon a completion event to be > sure that none go missing, and also for when we are reporting seqno > values in case of error or debugging. > > This greatly reduces the load for userspace using the busy-ioctl to > track active buffers, for instance halving the CPU used by X in pushing > the pixels from a software render (flash). The effect will be even more > magnified with userptr and so providing a zero-copy upload path in that > instance, or in similar instances where X is simply compositing DRI > buffers. > > v2: Reverse the polarity of the tachyon stream. Daniel suggested that > 'force' was too generic for the parameter name and that 'lazy_coherency' > better encapsulated the semantics of it being an optimization and its > purpose. Also notice that gen6_get_seqno() is only used by gen6/7 > chipsets and so the test for IS_GEN6 || IS_GEN7 is redundant in that > function. > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> Yeah, I like the new color. Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch> I'll muse over this some more before picking it up, just in case I'll notice a place this could blow up ... -Daniel > --- > drivers/gpu/drm/i915/i915_debugfs.c | 2 +- > drivers/gpu/drm/i915/i915_gem.c | 6 +++--- > drivers/gpu/drm/i915/i915_irq.c | 9 +++++---- > drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++++++- > 5 files changed, 21 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 86444fe..544abf5 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -426,7 +426,7 @@ static void i915_ring_seqno_info(struct seq_file *m, > { > if (ring->get_seqno) { > seq_printf(m, "Current sequence (%s): %d\n", > - ring->name, ring->get_seqno(ring)); > + ring->name, ring->get_seqno(ring, false)); > } > } > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 153c533..0582e22 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1888,7 +1888,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) > > WARN_ON(i915_verify_lists(ring->dev)); > > - seqno = ring->get_seqno(ring); > + seqno = ring->get_seqno(ring, true); > > for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) > if (seqno >= ring->sync_seqno[i]) > @@ -2060,7 +2060,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, > bool wait_forever = true; > int ret; > > - if (i915_seqno_passed(ring->get_seqno(ring), seqno)) > + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) > return 0; > > trace_i915_gem_request_wait_begin(ring, seqno); > @@ -2079,7 +2079,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, > getrawmonotonic(&before); > > #define EXIT_COND \ > - (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ > + (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ > atomic_read(&dev_priv->mm.wedged)) > do { > if (interruptible) > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 57e4f2b..0ba15e8 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -335,7 +335,7 @@ static void notify_ring(struct drm_device *dev, > if (ring->obj == NULL) > return; > > - trace_i915_gem_request_complete(ring, ring->get_seqno(ring)); > + trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false)); > > wake_up_all(&ring->irq_queue); > if (i915_enable_hangcheck) { > @@ -1052,7 +1052,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, > if (!ring->get_seqno) > return NULL; > > - seqno = ring->get_seqno(ring); > + seqno = ring->get_seqno(ring, false); > list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { > if (obj->ring != ring) > continue; > @@ -1106,7 +1106,7 @@ static void i915_record_ring_state(struct drm_device *dev, > > error->waiting[ring->id] = waitqueue_active(&ring->irq_queue); > error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base)); > - error->seqno[ring->id] = ring->get_seqno(ring); > + error->seqno[ring->id] = ring->get_seqno(ring, false); > error->acthd[ring->id] = intel_ring_get_active_head(ring); > error->head[ring->id] = I915_READ_HEAD(ring); > error->tail[ring->id] = I915_READ_TAIL(ring); > @@ -1603,7 +1603,8 @@ ring_last_seqno(struct intel_ring_buffer *ring) > static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err) > { > if (list_empty(&ring->request_list) || > - i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) { > + i915_seqno_passed(ring->get_seqno(ring, false), > + ring_last_seqno(ring))) { > /* Issue a wake-up to catch stuck h/w. */ > if (waitqueue_active(&ring->irq_queue)) { > DRM_ERROR("Hangcheck timer elapsed... %s idle\n", > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 6c0f504..c77bcd8 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -618,26 +618,24 @@ pc_render_add_request(struct intel_ring_buffer *ring, > } > > static u32 > -gen6_ring_get_seqno(struct intel_ring_buffer *ring) > +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) > { > - struct drm_device *dev = ring->dev; > - > /* Workaround to force correct ordering between irq and seqno writes on > * ivb (and maybe also on snb) by reading from a CS register (like > * ACTHD) before reading the status page. */ > - if (IS_GEN6(dev) || IS_GEN7(dev)) > + if (!lazy_coherency) > intel_ring_get_active_head(ring); > return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > } > > static u32 > -ring_get_seqno(struct intel_ring_buffer *ring) > +ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) > { > return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > } > > static u32 > -pc_render_get_seqno(struct intel_ring_buffer *ring) > +pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) > { > struct pipe_control *pc = ring->private; > return pc->cpu_page[0]; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 8b2b92e..2ea7a31 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -72,7 +72,14 @@ struct intel_ring_buffer { > u32 flush_domains); > int (*add_request)(struct intel_ring_buffer *ring, > u32 *seqno); > - u32 (*get_seqno)(struct intel_ring_buffer *ring); > + /* Some chipsets are not quite as coherent as advertised and need > + * an expensive kick to force a true read of the up-to-date seqno. > + * However, the up-to-date seqno is not always required and the last > + * seen value is good enough. Note that the seqno will always be > + * monotonic, even if not coherent. > + */ > + u32 (*get_seqno)(struct intel_ring_buffer *ring, > + bool lazy_coherency); > int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, > u32 offset, u32 length); > void (*cleanup)(struct intel_ring_buffer *ring); > -- > 1.7.10.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Mail: daniel at ffwll.ch Mobile: +41 (0)79 365 57 48