On Fri, Jul 13, 2012 at 02:14:07PM +0100, Chris Wilson wrote: > As we always flush the GPU cache prior to emitting the breadcrumb, we no > longer have to worry about the deferred flush causing the > pending_gpu_write to be delayed. So we can instead utilize the known > last_write_seqno to hopefully minimise the wait times. > > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> I like this, but I'd prefer if this would be at the end of the series as a neat improvement - I'd really prefer if we get together a somewhat minimal fix to take care of the flushing list and intel_begin_ring interruptable patch. The reason is that the merge window approaches fast, and if we're unlucky the current -next cycle won't make it into 3.6, so I'd have to send Dave a smaller pile of fixes. Which this doesn't really look like. Or do I again miss something? It's not really my brightest day ;-) Cheers, Daniel > --- > drivers/gpu/drm/i915/i915_debugfs.c | 9 ++-- > drivers/gpu/drm/i915/i915_drv.h | 12 ++--- > drivers/gpu/drm/i915/i915_gem.c | 66 ++++++++++++++++------------ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/i915_irq.c | 5 ++- > 5 files changed, 51 insertions(+), 43 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 359f6e8..a8b7db6 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -121,14 +121,15 @@ static const char *cache_level_str(int type) > static void > describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) > { > - seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s", > + seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s", > &obj->base, > get_pin_flag(obj), > get_tiling_flag(obj), > obj->base.size / 1024, > obj->base.read_domains, > obj->base.write_domain, > - obj->last_rendering_seqno, > + obj->last_read_seqno, > + obj->last_write_seqno, > obj->last_fenced_seqno, > cache_level_str(obj->cache_level), > obj->dirty ? " dirty" : "", > @@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m, > seq_printf(m, "%s [%d]:\n", name, count); > > while (count--) { > - seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s%s", > + seq_printf(m, " %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s", > err->gtt_offset, > err->size, > err->read_domains, > err->write_domain, > - err->seqno, > + err->rseqno, err->wseqno, > pin_flag(err->pinned), > tiling_flag(err->tiling), > dirty_flag(err->dirty), > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 51e234e..b398b72 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -221,7 +221,7 @@ struct drm_i915_error_state { > struct drm_i915_error_buffer { > u32 size; > u32 name; > - u32 seqno; > + u32 rseqno, wseqno; > u32 gtt_offset; > u32 read_domains; > u32 write_domain; > @@ -895,12 +895,6 @@ struct drm_i915_gem_object { > unsigned int dirty:1; > > /** > - * This is set if the object has been written to since the last > - * GPU flush. > - */ > - unsigned int pending_gpu_write:1; > - > - /** > * Fence register bits (if any) for this object. Will be set > * as needed when mapped into the GTT. > * Protected by dev->struct_mutex. > @@ -992,7 +986,8 @@ struct drm_i915_gem_object { > struct intel_ring_buffer *ring; > > /** Breadcrumb of last rendering to the buffer. */ > - uint32_t last_rendering_seqno; > + uint32_t last_read_seqno; > + uint32_t last_write_seqno; > /** Breadcrumb of last fenced GPU access to the buffer. */ > uint32_t last_fenced_seqno; > > @@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev); > int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, > gfp_t gfpmask); > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > -int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj); > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > struct intel_ring_buffer *to); > void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 1782369..49b8864 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, > list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); > list_move_tail(&obj->ring_list, &ring->active_list); > > - obj->last_rendering_seqno = seqno; > + obj->last_read_seqno = seqno; > > if (obj->fenced_gpu_access) { > obj->last_fenced_seqno = seqno; > @@ -1461,7 +1461,8 @@ static void > i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) > { > list_del_init(&obj->ring_list); > - obj->last_rendering_seqno = 0; > + obj->last_read_seqno = 0; > + obj->last_write_seqno = 0; > obj->last_fenced_seqno = 0; > } > > @@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) > obj->fenced_gpu_access = false; > > obj->active = 0; > - obj->pending_gpu_write = false; > drm_gem_object_unreference(&obj->base); > > WARN_ON(i915_verify_lists(dev)); > @@ -1811,7 +1811,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) > struct drm_i915_gem_object, > ring_list); > > - if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) > + if (!i915_seqno_passed(seqno, obj->last_read_seqno)) > break; > > if (obj->base.write_domain != 0) > @@ -2034,9 +2034,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) > * Ensures that all rendering to the object has completed and the object is > * safe to unbind from the GTT or access from the CPU. > */ > -int > -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) > +static __must_check int > +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, > + bool readonly) > { > + u32 seqno; > int ret; > > /* This function only exists to support waiting for existing rendering, > @@ -2047,13 +2049,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) > /* If there is rendering queued on the buffer being evicted, wait for > * it. > */ > - if (obj->active) { > - ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno); > - if (ret) > - return ret; > - i915_gem_retire_requests_ring(obj->ring); > + if (readonly) > + seqno = obj->last_write_seqno; > + else > + seqno = obj->last_read_seqno; > + if (seqno == 0) > + return 0; > + > + ret = i915_wait_seqno(obj->ring, seqno); > + if (ret) > + return ret; > + > + /* Manually manage the write flush as we may have not yet retired > + * the buffer. > + */ > + if (obj->last_write_seqno && > + i915_seqno_passed(seqno, obj->last_write_seqno)) { > + obj->last_write_seqno = 0; > + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; > } > > + i915_gem_retire_requests_ring(obj->ring); > return 0; > } > > @@ -2072,10 +2088,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) > if (ret) > return ret; > > - ret = i915_gem_check_olr(obj->ring, > - obj->last_rendering_seqno); > + ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); > if (ret) > return ret; > + > i915_gem_retire_requests_ring(obj->ring); > } > > @@ -2135,7 +2151,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) > goto out; > > if (obj->active) { > - seqno = obj->last_rendering_seqno; > + seqno = obj->last_read_seqno; > ring = obj->ring; > } > > @@ -2190,11 +2206,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > return 0; > > if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) > - return i915_gem_object_wait_rendering(obj); > + return i915_gem_object_wait_rendering(obj, false); > > idx = intel_ring_sync_index(from, to); > > - seqno = obj->last_rendering_seqno; > + seqno = obj->last_read_seqno; > if (seqno <= from->sync_seqno[idx]) > return 0; > > @@ -2938,11 +2954,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) > if (ret) > return ret; > > - if (obj->pending_gpu_write || write) { > - ret = i915_gem_object_wait_rendering(obj); > - if (ret) > - return ret; > - } > + ret = i915_gem_object_wait_rendering(obj, !write); > + if (ret) > + return ret; > > i915_gem_object_flush_cpu_write_domain(obj); > > @@ -3113,7 +3127,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) > return ret; > } > > - ret = i915_gem_object_wait_rendering(obj); > + ret = i915_gem_object_wait_rendering(obj, false); > if (ret) > return ret; > > @@ -3141,11 +3155,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) > if (ret) > return ret; > > - if (write || obj->pending_gpu_write) { > - ret = i915_gem_object_wait_rendering(obj); > - if (ret) > - return ret; > - } > + ret = i915_gem_object_wait_rendering(obj, !write); > + if (ret) > + return ret; > > i915_gem_object_flush_gtt_write_domain(obj); > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 0c26692..50e83e5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -949,7 +949,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, > i915_gem_object_move_to_active(obj, ring, seqno); > if (obj->base.write_domain) { > obj->dirty = 1; > - obj->pending_gpu_write = true; > + obj->last_write_seqno = seqno; > list_move_tail(&obj->gpu_write_list, > &ring->gpu_write_list); > if (obj->pin_count) /* check for potential scanout */ > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 566f61b..41ed41d 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, > { > err->size = obj->base.size; > err->name = obj->base.name; > - err->seqno = obj->last_rendering_seqno; > + err->rseqno = obj->last_read_seqno; > + err->wseqno = obj->last_write_seqno; > err->gtt_offset = obj->gtt_offset; > err->read_domains = obj->base.read_domains; > err->write_domain = obj->base.write_domain; > @@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, > if (obj->ring != ring) > continue; > > - if (i915_seqno_passed(seqno, obj->last_rendering_seqno)) > + if (i915_seqno_passed(seqno, obj->last_read_seqno)) > continue; > > if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) > -- > 1.7.10.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Mail: daniel at ffwll.ch Mobile: +41 (0)79 365 57 48