Since we don't differentiate on the different GPU read domains, it should be safe to allow back to back reads to occur without issuing a wait (or flush in the non-semaphore case). This has the unfortunate side effect that we need to keep track of all the outstanding buffer reads so that we can synchronize on a write, to another ring (since we don't know which read finishes first). In other words, the code is quite simple for two rings, but gets more tricky for > 2 rings. Here is a picture of the solution to the above problem Ring 0 Ring 1 Ring 2 batch 0 batch 1 batch 2 read buffer A read buffer A wait batch 0 wait batch 1 write buffer A This code is really untested. I'm hoping for some feedback if this is worth cleaning up, and testing more thoroughly. Cc: Chris Wilson <chris at chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter at ffwll.ch> Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 32 ++++++++++++++++++++++----- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4a9c1b9..d59bf20 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -852,6 +852,7 @@ struct drm_i915_gem_object { /** Breadcrumb of last rendering to the buffer. */ uint32_t last_rendering_seqno; + uint32_t lrs_ro[I915_NUM_RINGS]; struct intel_ring_buffer *ring; /** Breadcrumb of last fenced GPU access to the buffer. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed0b68f..64cc804 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1561,6 +1561,7 @@ i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) { list_del_init(&obj->ring_list); obj->last_rendering_seqno = 0; + memset(obj->lrs_ro, 0, sizeof(obj->lrs_ro)); } static void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 42a6529..78090a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -752,21 +752,20 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, { struct intel_ring_buffer *from = obj->ring; u32 seqno; - int ret, idx; + int ret, idx, i; if (from == NULL || to == from) return 0; - /* If the object isn't being written to, and the object isn't moving - * into a GPU write domain, it doesn't need to sync. - */ if (obj->pending_gpu_write == 0 && - obj->base.pending_write_domain == 0) + obj->base.pending_write_domain == 0) { + obj->lrs_ro[ffs(to->id) - 1] = obj->last_rendering_seqno; return 0; + } /* XXX gpu semaphores are implicated in various hard hangs on SNB */ if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores) - return i915_gem_object_wait_rendering(obj); + ret = i915_gem_object_wait_rendering(obj); idx = intel_ring_sync_index(from, to); @@ -792,6 +791,27 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, from->sync_seqno[idx] = seqno; + /* We must be doing a write to an object which may be in queue to be + * read by some other command. If we skipped synchronizing because of a read + * only request, we must check and add a sync point now. The reason we + * must sync on the write is because we have more than 2 rings. If two + * rings have outstanding reads (without sync), we can't let the write + * occur until both other have completed. + */ + for (i = 0; i < I915_NUM_RINGS; i++) { + uint32_t ro_seqno = obj->lrs_ro[i]; + if (ro_seqno != 0) { + obj->lrs_ro[i] = 0; + + if (i915_seqno_passed(from->get_seqno(from), ro_seqno)) + continue; + + ret = to->sync_to(to, from, ro_seqno - 1); + if (ret) + return ret; + } + } + return to->sync_to(to, from, seqno - 1); } -- 1.7.8