From: John Harrison <John.C.Harrison@xxxxxxxxx> There is a sync framework to allow work for multiple independent systems to be synchronised with each other but without stalling the CPU whether in the application or the driver. This patch adds support for this framework to the GPU scheduler. Batch buffers can now have sync framework fence objects associated with them. The scheduler will look at this fence when deciding what to submit next to the hardware. If the fence is outstanding then that batch buffer will be passed over in preference of one that is ready to run. If no other batches are ready then the scheduler will queue an asynchronous callback to be woken up when the fence has been signalled. The callback will wake the scheduler and submit the now ready batch buffer. For: VIZ-1587 Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_scheduler.c | 163 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_scheduler.h | 6 ++ 3 files changed, 165 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 12b4986..b568432 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1703,6 +1703,7 @@ struct i915_execbuffer_params { uint32_t batch_obj_vm_offset; struct intel_engine_cs *ring; struct drm_i915_gem_object *batch_obj; + struct sync_fence *fence_wait; struct drm_clip_rect *cliprects; uint32_t instp_mask; int instp_mode; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index c7139f8..19577c9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -25,6 +25,7 @@ #include "i915_drv.h" #include "intel_drv.h" #include "i915_scheduler.h" +#include <../drivers/staging/android/sync.h> static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node); static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler, @@ -100,6 +101,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) qe->scheduler_index = scheduler->index++; + WARN_ON(qe->params.fence_wait && + (atomic_read(&qe->params.fence_wait->status) == 0)); + intel_ring_reserved_space_cancel(qe->params.request->ringbuf); scheduler->flags[qe->params.ring->id] |= i915_sf_submitting; @@ -134,6 +138,11 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) if (qe->params.dispatch_flags & I915_DISPATCH_SECURE) i915_gem_execbuff_release_batch_obj(qe->params.batch_obj); +#ifdef CONFIG_SYNC + if (qe->params.fence_wait) + sync_fence_put(qe->params.fence_wait); +#endif + return 0; } @@ -625,6 +634,11 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring) node = list_first_entry(&remove, typeof(*node), link); list_del(&node->link); +#ifdef CONFIG_SYNC + if (node->params.fence_wait) + sync_fence_put(node->params.fence_wait); +#endif + /* Free up all the DRM object references */ i915_gem_scheduler_clean_node(node); @@ -845,17 +859,100 @@ static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ring, return count; } +#ifdef CONFIG_SYNC +/* Use a private structure in order to pass the 'dev' pointer through */ +struct i915_sync_fence_waiter { + struct sync_fence_waiter sfw; + struct drm_device *dev; + struct i915_scheduler_queue_entry *node; +}; + +static void i915_scheduler_wait_fence_signaled(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct i915_sync_fence_waiter *i915_waiter; + struct drm_i915_private *dev_priv = NULL; + + i915_waiter = container_of(waiter, struct i915_sync_fence_waiter, sfw); + dev_priv = (i915_waiter && i915_waiter->dev) ? + i915_waiter->dev->dev_private : NULL; + + /* + * NB: The callback is executed at interrupt time, thus it can not + * call _submit() directly. It must go via the delayed work handler. + */ + if (dev_priv) { + struct i915_scheduler *scheduler; + unsigned long flags; + + scheduler = dev_priv->scheduler; + + spin_lock_irqsave(&scheduler->lock, flags); + i915_waiter->node->flags &= ~i915_qef_fence_waiting; + spin_unlock_irqrestore(&scheduler->lock, flags); + + queue_work(dev_priv->wq, &dev_priv->mm.scheduler_work); + } + + kfree(waiter); +} + +static bool i915_scheduler_async_fence_wait(struct drm_device *dev, + struct i915_scheduler_queue_entry *node) +{ + struct i915_sync_fence_waiter *fence_waiter; + struct sync_fence *fence = node->params.fence_wait; + int signaled; + bool success = true; + + if ((node->flags & i915_qef_fence_waiting) == 0) + node->flags |= i915_qef_fence_waiting; + else + return true; + + if (fence == NULL) + return false; + + signaled = atomic_read(&fence->status); + if (!signaled) { + fence_waiter = kmalloc(sizeof(*fence_waiter), GFP_KERNEL); + if (!fence_waiter) { + success = false; + goto end; + } + + sync_fence_waiter_init(&fence_waiter->sfw, + i915_scheduler_wait_fence_signaled); + fence_waiter->node = node; + fence_waiter->dev = dev; + + if (sync_fence_wait_async(fence, &fence_waiter->sfw)) { + /* an error occurred, usually this is because the + * fence was signaled already */ + signaled = atomic_read(&fence->status); + if (!signaled) { + success = false; + goto end; + } + } + } +end: + return success; +} +#endif + static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, struct i915_scheduler_queue_entry **pop_node, unsigned long *flags) { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_scheduler *scheduler = dev_priv->scheduler; + struct i915_scheduler_queue_entry *best_wait, *fence_wait = NULL; struct i915_scheduler_queue_entry *best; struct i915_scheduler_queue_entry *node; int ret; int i; - bool any_queued; + bool signalled, any_queued; bool has_local, has_remote, only_remote; *pop_node = NULL; @@ -863,13 +960,25 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, any_queued = false; only_remote = false; + best_wait = NULL; best = NULL; +#ifndef CONFIG_SYNC + signalled = true; +#endif + list_for_each_entry(node, &scheduler->node_queue[ring->id], link) { if (!I915_SQS_IS_QUEUED(node)) continue; any_queued = true; +#ifdef CONFIG_SYNC + if (node->params.fence_wait) + signalled = atomic_read(&node->params.fence_wait->status) != 0; + else + signalled = true; +#endif // CONFIG_SYNC + has_local = false; has_remote = false; for (i = 0; i < node->num_deps; i++) { @@ -886,9 +995,15 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, only_remote = true; if (!has_local && !has_remote) { - if (!best || - (node->priority > best->priority)) - best = node; + if (signalled) { + if (!best || + (node->priority > best->priority)) + best = node; + } else { + if (!best_wait || + (node->priority > best_wait->priority)) + best_wait = node; + } } } @@ -904,8 +1019,34 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, * (a) there are no buffers in the queue * (b) all queued buffers are dependent on other buffers * e.g. on a buffer that is in flight on a different ring + * (c) all independent buffers are waiting on fences */ - if (only_remote) { + if (best_wait) { + /* Need to wait for something to be signalled. + * + * NB: do not really want to wait on one specific fd + * because there is no guarantee in the order that + * blocked buffers will be signalled. Need to wait on + * 'anything' and then rescan for best available, if + * still nothing then wait again... + * + * NB 2: The wait must also wake up if someone attempts + * to submit a new buffer. The new buffer might be + * independent of all others and thus could jump the + * queue and start running immediately. + * + * NB 3: Lastly, must not wait with the spinlock held! + * + * So rather than wait here, need to queue a deferred + * wait thread and just return 'nothing to do'. + * + * NB 4: Can't actually do the wait here because the + * spinlock is still held and the wait requires doing + * a memory allocation. + */ + fence_wait = best_wait; + ret = -EAGAIN; + } else if (only_remote) { /* The only dependent buffers are on another ring. */ ret = -EAGAIN; } else if (any_queued) { @@ -917,6 +1058,18 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, /* i915_scheduler_dump_queue_pop(ring, best); */ + if (fence_wait) { +#ifdef CONFIG_SYNC + /* It should be safe to sleep now... */ + /* NB: Need to release and reacquire the spinlock though */ + spin_unlock_irqrestore(&scheduler->lock, *flags); + i915_scheduler_async_fence_wait(ring->dev, fence_wait); + spin_lock_irqsave(&scheduler->lock, *flags); +#else + BUG_ON(true); +#endif + } + *pop_node = best; return ret; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index ce94b0b..8ca4b4b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -56,6 +56,11 @@ struct i915_scheduler_obj_entry { struct drm_i915_gem_object *obj; }; +enum i915_scheduler_queue_entry_flags { + /* Fence is being waited on */ + i915_qef_fence_waiting = (1 << 0), +}; + struct i915_scheduler_queue_entry { struct i915_execbuffer_params params; uint32_t priority; @@ -68,6 +73,7 @@ struct i915_scheduler_queue_entry { struct timespec stamp; struct list_head link; uint32_t scheduler_index; + uint32_t flags; }; struct i915_scheduler { -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx