From: John Harrison <John.C.Harrison@xxxxxxxxx> There is a sync framework to allow work for multiple independent systems to be synchronised with each other but without stalling the CPU whether in the application or the driver. This patch adds support for this framework to the GPU scheduler. Batch buffers can now have sync framework fence objects associated with them. The scheduler will look at this fence when deciding what to submit next to the hardware. If the fence is outstanding then that batch buffer will be passed over in preference of one that is ready to run. If no other batches are ready then the scheduler will queue an asynchronous callback to be woken up when the fence has been signalled. The callback will wake the scheduler and submit the now ready batch buffer. v2: Updated to use the new sync_fence_is_signaled() API rather than having to know about the internals of a fence object. Also removed the spin lock from the fence wait callback function. This was used to clear the 'waiting' flag. However, it causes a problem with the TDR code. Specifically, when cancelling work packets due to a TDR there is a code path where the fence can be signalled while the spinlock is already held. It is not really necessary to clear the flag anyway as it's purpose is solely to prevent multiple waits being issued. Once the fence has been signalled, no further waits will be attempted so it doesn't matter whether the fence is marked as having an outstanding wait or not. For: VIZ-1587 Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_scheduler.c | 144 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_scheduler.h | 6 ++ 3 files changed, 146 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 15dee41..4187e75 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1698,6 +1698,7 @@ struct i915_execbuffer_params { uint64_t batch_obj_vm_offset; struct intel_engine_cs *ring; struct drm_i915_gem_object *batch_obj; + struct sync_fence *fence_wait; struct drm_clip_rect *cliprects; uint32_t instp_mask; int instp_mode; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index d4d2466..939dc2b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -25,6 +25,7 @@ #include "i915_drv.h" #include "intel_drv.h" #include "i915_scheduler.h" +#include <../drivers/android/sync.h> static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node); static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler, @@ -96,6 +97,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) if (i915.scheduler_override & i915_so_direct_submit) { int ret; + WARN_ON(qe->params.fence_wait && + (!sync_fence_is_signaled(qe->params.fence_wait))); + intel_ring_reserved_space_cancel(qe->params.request->ringbuf); scheduler->flags[qe->params.ring->id] |= i915_sf_submitting; @@ -130,6 +134,9 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) if (qe->params.dispatch_flags & I915_DISPATCH_SECURE) i915_gem_execbuff_release_batch_obj(qe->params.batch_obj); + if (qe->params.fence_wait) + sync_fence_put(qe->params.fence_wait); + return 0; } @@ -531,6 +538,9 @@ static int i915_scheduler_remove(struct intel_engine_cs *ring) node = list_first_entry(&remove, typeof(*node), link); list_del(&node->link); + if (node->params.fence_wait) + sync_fence_put(node->params.fence_wait); + /* Free up all the DRM object references */ i915_gem_scheduler_clean_node(node); @@ -796,17 +806,94 @@ static int i915_scheduler_submit_max_priority(struct intel_engine_cs *ring, return count; } +/* Use a private structure in order to pass the 'dev' pointer through */ +struct i915_sync_fence_waiter { + struct sync_fence_waiter sfw; + struct drm_device *dev; + struct i915_scheduler_queue_entry *node; +}; + +/* + * NB: This callback can be executed at interrupt time. Further, it can be + * called from within the TDR reset sequence during a scheduler 'kill_all' + * and thus be called while the scheduler spinlock is already held. Thus + * it can grab neither the driver mutex nor the scheduler spinlock. + */ +static void i915_scheduler_wait_fence_signaled(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct i915_sync_fence_waiter *i915_waiter; + struct drm_i915_private *dev_priv = NULL; + + i915_waiter = container_of(waiter, struct i915_sync_fence_waiter, sfw); + dev_priv = (i915_waiter && i915_waiter->dev) ? + i915_waiter->dev->dev_private : NULL; + + /* + * NB: The callback is executed at interrupt time, thus it can not + * call _submit() directly. It must go via the delayed work handler. + */ + if (dev_priv) + queue_work(dev_priv->wq, &dev_priv->mm.scheduler_work); + + kfree(waiter); +} + +static bool i915_scheduler_async_fence_wait(struct drm_device *dev, + struct i915_scheduler_queue_entry *node) +{ + struct i915_sync_fence_waiter *fence_waiter; + struct sync_fence *fence = node->params.fence_wait; + int signaled; + bool success = true; + + if ((node->flags & i915_qef_fence_waiting) == 0) + node->flags |= i915_qef_fence_waiting; + else + return true; + + if (fence == NULL) + return false; + + signaled = sync_fence_is_signaled(fence); + if (!signaled) { + fence_waiter = kmalloc(sizeof(*fence_waiter), GFP_KERNEL); + if (!fence_waiter) { + success = false; + goto end; + } + + sync_fence_waiter_init(&fence_waiter->sfw, + i915_scheduler_wait_fence_signaled); + fence_waiter->node = node; + fence_waiter->dev = dev; + + if (sync_fence_wait_async(fence, &fence_waiter->sfw)) { + /* an error occurred, usually this is because the + * fence was signaled already */ + signaled = sync_fence_is_signaled(fence); + if (!signaled) { + success = false; + goto end; + } + } + } +end: + return success; +} + static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, struct i915_scheduler_queue_entry **pop_node, unsigned long *flags) { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_scheduler *scheduler = dev_priv->scheduler; + struct i915_scheduler_queue_entry *best_wait, *fence_wait = NULL; struct i915_scheduler_queue_entry *best; struct i915_scheduler_queue_entry *node; int ret; int i; - bool any_queued; + bool signalled, any_queued; bool has_local, has_remote, only_remote; *pop_node = NULL; @@ -814,13 +901,20 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, any_queued = false; only_remote = false; + best_wait = NULL; best = NULL; + signalled = true; list_for_each_entry(node, &scheduler->node_queue[ring->id], link) { if (!I915_SQS_IS_QUEUED(node)) continue; any_queued = true; + if (node->params.fence_wait) + signalled = sync_fence_is_signaled(node->params.fence_wait); + else + signalled = true; + has_local = false; has_remote = false; for (i = 0; i < node->num_deps; i++) { @@ -837,9 +931,15 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, only_remote = true; if (!has_local && !has_remote) { - if (!best || - (node->priority > best->priority)) - best = node; + if (signalled) { + if (!best || + (node->priority > best->priority)) + best = node; + } else { + if (!best_wait || + (node->priority > best_wait->priority)) + best_wait = node; + } } } @@ -855,8 +955,34 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, * (a) there are no buffers in the queue * (b) all queued buffers are dependent on other buffers * e.g. on a buffer that is in flight on a different ring + * (c) all independent buffers are waiting on fences */ - if (only_remote) { + if (best_wait) { + /* Need to wait for something to be signalled. + * + * NB: do not really want to wait on one specific fd + * because there is no guarantee in the order that + * blocked buffers will be signalled. Need to wait on + * 'anything' and then rescan for best available, if + * still nothing then wait again... + * + * NB 2: The wait must also wake up if someone attempts + * to submit a new buffer. The new buffer might be + * independent of all others and thus could jump the + * queue and start running immediately. + * + * NB 3: Lastly, must not wait with the spinlock held! + * + * So rather than wait here, need to queue a deferred + * wait thread and just return 'nothing to do'. + * + * NB 4: Can't actually do the wait here because the + * spinlock is still held and the wait requires doing + * a memory allocation. + */ + fence_wait = best_wait; + ret = -EAGAIN; + } else if (only_remote) { /* The only dependent buffers are on another ring. */ ret = -EAGAIN; } else if (any_queued) { @@ -868,6 +994,14 @@ static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring, /* i915_scheduler_dump_queue_pop(ring, best); */ + if (fence_wait) { + /* It should be safe to sleep now... */ + /* NB: Need to release and reacquire the spinlock though */ + spin_unlock_irqrestore(&scheduler->lock, *flags); + i915_scheduler_async_fence_wait(ring->dev, fence_wait); + spin_lock_irqsave(&scheduler->lock, *flags); + } + *pop_node = best; return ret; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index fc58d49..adfa1e0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -51,6 +51,11 @@ struct i915_scheduler_obj_entry { struct drm_i915_gem_object *obj; }; +enum i915_scheduler_queue_entry_flags { + /* Fence is being waited on */ + i915_qef_fence_waiting = (1 << 0), +}; + struct i915_scheduler_queue_entry { struct i915_execbuffer_params params; /* -1023 = lowest priority, 0 = default, 1023 = highest */ @@ -63,6 +68,7 @@ struct i915_scheduler_queue_entry { enum i915_scheduler_queue_status status; unsigned long stamp; struct list_head link; + uint32_t flags; }; struct i915_scheduler { -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx