Avoid the forcewake overhead when simply retiring requests, as often the last seen seqno is good enough to satisfy the retirment process and will be promptly re-run in any case. Only ensure that we force the coherent seqno read when we are explicitly waiting upon a completion event to be sure that none go missing, and also for when we are reporting seqno values in case of error or debugging. This greatly reduces the load for userspace using the busy-ioctl to track active buffers, for instance halving the CPU used by X in pushing the pixels from a software render (flash). The effect will be even more magnified with userptr and so providing a zero-copy upload path in that instance, or in similar instances where X is simply compositing DRI buffers. Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- drivers/gpu/drm/i915/i915_irq.c | 9 +++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 86444fe..13dfec6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -426,7 +426,7 @@ static void i915_ring_seqno_info(struct seq_file *m, { if (ring->get_seqno) { seq_printf(m, "Current sequence (%s): %d\n", - ring->name, ring->get_seqno(ring)); + ring->name, ring->get_seqno(ring, true)); } } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 153c533..fde09eb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1888,7 +1888,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) WARN_ON(i915_verify_lists(ring->dev)); - seqno = ring->get_seqno(ring); + seqno = ring->get_seqno(ring, false); for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) if (seqno >= ring->sync_seqno[i]) @@ -2060,7 +2060,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, bool wait_forever = true; int ret; - if (i915_seqno_passed(ring->get_seqno(ring), seqno)) + if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) return 0; trace_i915_gem_request_wait_begin(ring, seqno); @@ -2079,7 +2079,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, getrawmonotonic(&before); #define EXIT_COND \ - (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ + (i915_seqno_passed(ring->get_seqno(ring, true), seqno) || \ atomic_read(&dev_priv->mm.wedged)) do { if (interruptible) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 57e4f2b..13db040 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -335,7 +335,7 @@ static void notify_ring(struct drm_device *dev, if (ring->obj == NULL) return; - trace_i915_gem_request_complete(ring, ring->get_seqno(ring)); + trace_i915_gem_request_complete(ring, ring->get_seqno(ring, true)); wake_up_all(&ring->irq_queue); if (i915_enable_hangcheck) { @@ -1052,7 +1052,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (!ring->get_seqno) return NULL; - seqno = ring->get_seqno(ring); + seqno = ring->get_seqno(ring, true); list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { if (obj->ring != ring) continue; @@ -1106,7 +1106,7 @@ static void i915_record_ring_state(struct drm_device *dev, error->waiting[ring->id] = waitqueue_active(&ring->irq_queue); error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base)); - error->seqno[ring->id] = ring->get_seqno(ring); + error->seqno[ring->id] = ring->get_seqno(ring, true); error->acthd[ring->id] = intel_ring_get_active_head(ring); error->head[ring->id] = I915_READ_HEAD(ring); error->tail[ring->id] = I915_READ_TAIL(ring); @@ -1603,7 +1603,8 @@ ring_last_seqno(struct intel_ring_buffer *ring) static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err) { if (list_empty(&ring->request_list) || - i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) { + i915_seqno_passed(ring->get_seqno(ring, true), + ring_last_seqno(ring))) { /* Issue a wake-up to catch stuck h/w. */ if (waitqueue_active(&ring->irq_queue)) { DRM_ERROR("Hangcheck timer elapsed... %s idle\n", diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6c0f504..edcb72d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -618,26 +618,24 @@ pc_render_add_request(struct intel_ring_buffer *ring, } static u32 -gen6_ring_get_seqno(struct intel_ring_buffer *ring) +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool force) { - struct drm_device *dev = ring->dev; - /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like * ACTHD) before reading the status page. */ - if (IS_GEN6(dev) || IS_GEN7(dev)) + if (force) intel_ring_get_active_head(ring); return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } static u32 -ring_get_seqno(struct intel_ring_buffer *ring) +ring_get_seqno(struct intel_ring_buffer *ring, bool force) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } static u32 -pc_render_get_seqno(struct intel_ring_buffer *ring) +pc_render_get_seqno(struct intel_ring_buffer *ring, bool force) { struct pipe_control *pc = ring->private; return pc->cpu_page[0]; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8b2b92e..986f85f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -72,7 +72,8 @@ struct intel_ring_buffer { u32 flush_domains); int (*add_request)(struct intel_ring_buffer *ring, u32 *seqno); - u32 (*get_seqno)(struct intel_ring_buffer *ring); + u32 (*get_seqno)(struct intel_ring_buffer *ring, + bool force); int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, u32 offset, u32 length); void (*cleanup)(struct intel_ring_buffer *ring); -- 1.7.10.4