As we always flush the GPU cache prior to emitting the breadcrumb, we no longer have to worry about the deferred flush causing the pending_gpu_write to be delayed. So we can instead utilize the known last_write_seqno to hopefully minimise the wait times. Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 9 ++-- drivers/gpu/drm/i915/i915_drv.h | 12 ++--- drivers/gpu/drm/i915/i915_gem.c | 66 ++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 5 ++- 5 files changed, 51 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 359f6e8..a8b7db6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -121,14 +121,15 @@ static const char *cache_level_str(int type) static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { - seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s", + seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s", &obj->base, get_pin_flag(obj), get_tiling_flag(obj), obj->base.size / 1024, obj->base.read_domains, obj->base.write_domain, - obj->last_rendering_seqno, + obj->last_read_seqno, + obj->last_write_seqno, obj->last_fenced_seqno, cache_level_str(obj->cache_level), obj->dirty ? " dirty" : "", @@ -630,12 +631,12 @@ static void print_error_buffers(struct seq_file *m, seq_printf(m, "%s [%d]:\n", name, count); while (count--) { - seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s%s", + seq_printf(m, " %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s", err->gtt_offset, err->size, err->read_domains, err->write_domain, - err->seqno, + err->rseqno, err->wseqno, pin_flag(err->pinned), tiling_flag(err->tiling), dirty_flag(err->dirty), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1f5f5ff..49a532e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -221,7 +221,7 @@ struct drm_i915_error_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 seqno; + u32 rseqno, wseqno; u32 gtt_offset; u32 read_domains; u32 write_domain; @@ -895,12 +895,6 @@ struct drm_i915_gem_object { unsigned int dirty:1; /** - * This is set if the object has been written to since the last - * GPU flush. - */ - unsigned int pending_gpu_write:1; - - /** * Fence register bits (if any) for this object. Will be set * as needed when mapped into the GTT. * Protected by dev->struct_mutex. @@ -992,7 +986,8 @@ struct drm_i915_gem_object { struct intel_ring_buffer *ring; /** Breadcrumb of last rendering to the buffer. */ - uint32_t last_rendering_seqno; + uint32_t last_read_seqno; + uint32_t last_write_seqno; /** Breadcrumb of last fenced GPU access to the buffer. */ uint32_t last_fenced_seqno; @@ -1291,7 +1286,6 @@ void i915_gem_lastclose(struct drm_device *dev); int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, gfp_t gfpmask); int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); -int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_ring_buffer *to); void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4c52098..f9680b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1441,7 +1441,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); list_move_tail(&obj->ring_list, &ring->active_list); - obj->last_rendering_seqno = seqno; + obj->last_read_seqno = seqno; if (obj->fenced_gpu_access) { obj->last_fenced_seqno = seqno; @@ -1461,7 +1461,8 @@ static void i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) { list_del_init(&obj->ring_list); - obj->last_rendering_seqno = 0; + obj->last_read_seqno = 0; + obj->last_write_seqno = 0; obj->last_fenced_seqno = 0; } @@ -1493,7 +1494,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) obj->fenced_gpu_access = false; obj->active = 0; - obj->pending_gpu_write = false; drm_gem_object_unreference(&obj->base); WARN_ON(i915_verify_lists(dev)); @@ -1812,7 +1812,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) struct drm_i915_gem_object, ring_list); - if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) + if (!i915_seqno_passed(seqno, obj->last_read_seqno)) break; if (obj->base.write_domain != 0) @@ -2036,9 +2036,11 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) +static __must_check int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) { + u32 seqno; int ret; /* This function only exists to support waiting for existing rendering, @@ -2049,13 +2051,27 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) /* If there is rendering queued on the buffer being evicted, wait for * it. */ - if (obj->active) { - ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno); - if (ret) - return ret; - i915_gem_retire_requests_ring(obj->ring); + if (readonly) + seqno = obj->last_write_seqno; + else + seqno = obj->last_read_seqno; + if (seqno == 0) + return 0; + + ret = i915_wait_seqno(obj->ring, seqno); + if (ret) + return ret; + + /* Manually manage the write flush as we may have not yet retired + * the buffer. + */ + if (obj->last_write_seqno && + i915_seqno_passed(seqno, obj->last_write_seqno)) { + obj->last_write_seqno = 0; + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } + i915_gem_retire_requests_ring(obj->ring); return 0; } @@ -2074,10 +2090,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (ret) return ret; - ret = i915_gem_check_olr(obj->ring, - obj->last_rendering_seqno); + ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); if (ret) return ret; + i915_gem_retire_requests_ring(obj->ring); } @@ -2137,7 +2153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto out; if (obj->active) { - seqno = obj->last_rendering_seqno; + seqno = obj->last_read_seqno; ring = obj->ring; } @@ -2192,11 +2208,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) - return i915_gem_object_wait_rendering(obj); + return i915_gem_object_wait_rendering(obj, false); idx = intel_ring_sync_index(from, to); - seqno = obj->last_rendering_seqno; + seqno = obj->last_read_seqno; if (seqno <= from->sync_seqno[idx]) return 0; @@ -2940,11 +2956,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - if (obj->pending_gpu_write || write) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, !write); + if (ret) + return ret; i915_gem_object_flush_cpu_write_domain(obj); @@ -3115,7 +3129,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) return ret; } - ret = i915_gem_object_wait_rendering(obj); + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; @@ -3143,11 +3157,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - if (write || obj->pending_gpu_write) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } + ret = i915_gem_object_wait_rendering(obj, !write); + if (ret) + return ret; i915_gem_object_flush_gtt_write_domain(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c8957fb..c08229f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -954,7 +954,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, i915_gem_object_move_to_active(obj, ring, seqno); if (obj->base.write_domain) { obj->dirty = 1; - obj->pending_gpu_write = true; + obj->last_write_seqno = seqno; list_move_tail(&obj->gpu_write_list, &ring->gpu_write_list); if (obj->pin_count) /* check for potential scanout */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 566f61b..41ed41d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, { err->size = obj->base.size; err->name = obj->base.name; - err->seqno = obj->last_rendering_seqno; + err->rseqno = obj->last_read_seqno; + err->wseqno = obj->last_write_seqno; err->gtt_offset = obj->gtt_offset; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (obj->ring != ring) continue; - if (i915_seqno_passed(seqno, obj->last_rendering_seqno)) + if (i915_seqno_passed(seqno, obj->last_read_seqno)) continue; if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) -- 1.7.10.4