From: John Harrison <John.C.Harrison@xxxxxxxxx> The intended usage model for struct fence is that the signalled status should be set on demand rather than polled. That is, there should not be a need for a 'signaled' function to be called everytime the status is queried. Instead, 'something' should be done to enable a signal callback from the hardware which will update the state directly. In the case of requests, this is the seqno update interrupt. The idea is that this callback will only be enabled on demand when something actually tries to wait on the fence. This change removes the polling test and replaces it with the callback scheme. Each fence is added to a 'please poke me' list at the start of i915_add_request(). The interrupt handler then scans through the 'poke me' list when a new seqno pops out and signals any matching fence/request. The fence is then removed from the list so the entire request stack does not need to be scanned every time. Note that the fence is added to the list before the commands to generate the seqno interrupt are added to the ring. Thus the sequence is guaranteed to be race free if the interrupt is already enabled. One complication here is that the 'poke me' system requires holding a reference count on the request to guarantee that it won't be freed prematurely. Unfortunately, it is unsafe to decrement the reference count from the interrupt handler because if that is the last reference, the clean up code gets run and the clean up code is not IRQ friendly. Hence, the request is added to a 'please clean me' list that gets processed at retire time. Any request in this list simply has its count decremented and is then removed from that list. Note that the interrupt is only enabled on demand (i.e. when __wait_request() is called). Thus there is still a potential race when enabling the interrupt as the request may already have completed. However, this is simply solved by calling the interrupt processing code immediately after enabling the interrupt and thereby checking for already completed requests. Lastly, the ring clean up code has the possibility to cancel outstanding requests (e.g. because TDR has reset the ring). These requests will never get signalled and so must be removed from the signal list manually. This is done by setting a 'cancelled' flag and then calling the regular notify/retire code path rather than attempting to duplicate the list manipulatation and clean up code in multiple places. This also avoid any race condition where the cancellation request might occur after/during the completion interrupt actually arriving. For: VIZ-5190 Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 8 ++ drivers/gpu/drm/i915/i915_gem.c | 136 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_lrc.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 6 files changed, 143 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2e6c151..c1f69cc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2146,6 +2146,10 @@ struct drm_i915_gem_request { * holding the driver mutex lock. */ struct fence fence; + struct list_head signal_list; + struct list_head unsignal_list; + bool cancelled; + bool irq_enabled; /** On Which ring this request was generated */ struct drm_i915_private *i915; @@ -2223,6 +2227,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct drm_i915_gem_request **req_out); void i915_gem_request_cancel(struct drm_i915_gem_request *req); +void i915_gem_request_submit(struct drm_i915_gem_request *req); +void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req); +void i915_gem_request_notify(struct intel_engine_cs *ring); + static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { return fence_is_signaled(&req->fence); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0ae76b4..8aec326 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1231,6 +1231,11 @@ int __i915_wait_request(struct drm_i915_gem_request *req, if (list_empty(&req->list)) return 0; + /* + * Enable interrupt completion of the request. + */ + i915_gem_request_enable_interrupt(req); + if (i915_gem_request_completed(req)) return 0; @@ -1391,6 +1396,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) list_del_init(&request->list); i915_gem_request_remove_from_client(request); + /* In case the request is still in the signal pending list */ + if (!list_empty(&request->signal_list)) + request->cancelled = true; + i915_gem_request_unreference(request); } @@ -2529,6 +2538,12 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->postfix = intel_ring_get_tail(ringbuf); + /* + * Add the fence to the pending list before emitting the commands to + * generate a seqno notification interrupt. + */ + i915_gem_request_submit(request); + if (i915.enable_execlists) ret = ring->emit_request(request); else { @@ -2630,6 +2645,9 @@ static void i915_gem_request_free(struct fence *req_fence) i915_gem_context_unreference(ctx); } + if (req->irq_enabled) + req->ring->irq_put(req->ring); + kmem_cache_free(req->i915->requests, req); } @@ -2645,31 +2663,100 @@ static const char *i915_gem_request_get_timeline_name(struct fence *req_fence) return req->ring->name; } -static bool i915_gem_request_enable_signaling(struct fence *req_fence) +/* + * The request has been submitted to the hardware so add the fence to the + * list of signalable fences. + * + * NB: This does not enable interrupts yet. That only occurs on demand when + * the request is actually waited on. However, adding it to the list early + * ensures that there is no race condition where the interrupt could pop + * out prematurely and thus be completely lost. The race is merely that the + * interrupt must be manually checked for after being enabled. + */ +void i915_gem_request_submit(struct drm_i915_gem_request *req) { - /* Interrupt driven fences are not implemented yet.*/ - WARN(true, "This should not be called!"); - return true; + fence_enable_sw_signaling(&req->fence); +} + +/* + * The request is being actively waited on, so enable interrupt based + * completion signalling. + */ +void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req) +{ + if (req->irq_enabled) + return; + + WARN_ON(!req->ring->irq_get(req->ring)); + req->irq_enabled = true; + + /* + * Because the interrupt is only enabled on demand, there is a race + * where the interrupt can fire before anyone is looking for it. So + * do an explicit check for missed interrupts. + */ + i915_gem_request_notify(req->ring); } -static bool i915_gem_request_is_completed(struct fence *req_fence) +static bool i915_gem_request_enable_signaling(struct fence *req_fence) { struct drm_i915_gem_request *req = container_of(req_fence, typeof(*req), fence); + + i915_gem_request_reference(req); + WARN_ON(!list_empty(&req->signal_list)); + list_add_tail(&req->signal_list, &req->ring->fence_signal_list); + + /* + * Note that signalling is always enabled for every request before + * that request is submitted to the hardware. Therefore there is + * no race condition whereby the signal could pop out before the + * request has been added to the list. Hence no need to check + * for completion, undo the list add and return false. + * + * NB: Interrupts are only enabled on demand. Thus there is still a + * race where the request could complete before the interrupt has + * been enabled. Thus care must be taken at that point. + */ + + return true; +} + +void i915_gem_request_notify(struct intel_engine_cs *ring) +{ + struct drm_i915_gem_request *req, *req_next; + unsigned long flags; u32 seqno; - BUG_ON(req == NULL); + if (list_empty(&ring->fence_signal_list)) + return; + + seqno = ring->get_seqno(ring, false); + + spin_lock_irqsave(&ring->fence_lock, flags); + list_for_each_entry_safe(req, req_next, &ring->fence_signal_list, signal_list) { + if (!req->cancelled) { + if (!i915_seqno_passed(seqno, req->seqno)) + continue; - seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/); + fence_signal_locked(&req->fence); + } + + list_del_init(&req->signal_list); + if (req->irq_enabled) { + req->ring->irq_put(req->ring); + req->irq_enabled = false; + } - return i915_seqno_passed(seqno, req->seqno); + list_add_tail(&req->unsignal_list, &req->ring->fence_unsignal_list); + } + spin_unlock_irqrestore(&ring->fence_lock, flags); } static const struct fence_ops i915_gem_request_fops = { .get_driver_name = i915_gem_request_get_driver_name, .get_timeline_name = i915_gem_request_get_timeline_name, .enable_signaling = i915_gem_request_enable_signaling, - .signaled = i915_gem_request_is_completed, .wait = fence_default_wait, .release = i915_gem_request_free, }; @@ -2709,6 +2796,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, goto err; } + INIT_LIST_HEAD(&req->signal_list); fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock, ring->fence_context, req->seqno); /* @@ -2829,6 +2917,13 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_request_retire(request); } + + /* + * Make sure any requests that were on the signal pending list get + * cleaned up. + */ + i915_gem_request_notify(ring); + i915_gem_retire_requests_ring(ring); } void i915_gem_restore_fences(struct drm_device *dev) @@ -2884,6 +2979,13 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) { WARN_ON(i915_verify_lists(ring->dev)); + /* + * If no-one has waited on a request recently then interrupts will + * not have been enabled and thus no requests will ever be marked as + * completed. So do an interrupt check now. + */ + i915_gem_request_notify(ring); + /* Retire requests first as we use it above for the early return. * If we retire requests last, we may use a later seqno and so clear * the requests lists without clearing the active list, leading to @@ -2925,6 +3027,20 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) i915_gem_request_assign(&ring->trace_irq_req, NULL); } + while (!list_empty(&ring->fence_unsignal_list)) { + struct drm_i915_gem_request *request; + unsigned long flags; + + spin_lock_irqsave(&ring->fence_lock, flags); + request = list_first_entry(&ring->fence_unsignal_list, + struct drm_i915_gem_request, + unsignal_list); + list_del(&request->unsignal_list); + spin_unlock_irqrestore(&ring->fence_lock, flags); + + i915_gem_request_unreference(request); + } + WARN_ON(i915_verify_lists(ring->dev)); } @@ -5256,6 +5372,8 @@ init_ring_lists(struct intel_engine_cs *ring) { INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->fence_signal_list); + INIT_LIST_HEAD(&ring->fence_unsignal_list); } void i915_init_vm(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index bab2ca2..3390943 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -853,6 +853,8 @@ static void notify_ring(struct intel_engine_cs *ring) trace_i915_gem_request_notify(ring); + i915_gem_request_notify(ring); + wake_up_all(&ring->irq_queue); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3dc7e38..b4e45c5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1651,6 +1651,8 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->fence_signal_list); + INIT_LIST_HEAD(&ring->fence_unsignal_list); spin_lock_init(&ring->fence_lock); i915_gem_batch_pool_init(dev, &ring->batch_pool); init_waitqueue_head(&ring->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cb63e2f..4b2b669 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2030,6 +2030,8 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); INIT_LIST_HEAD(&ring->execlist_queue); + INIT_LIST_HEAD(&ring->fence_signal_list); + INIT_LIST_HEAD(&ring->fence_unsignal_list); spin_lock_init(&ring->fence_lock); i915_gem_batch_pool_init(dev, &ring->batch_pool); ringbuf->size = 32 * PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8ad25b0..3491b48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -341,6 +341,8 @@ struct intel_engine_cs { unsigned fence_context; spinlock_t fence_lock; + struct list_head fence_signal_list; + struct list_head fence_unsignal_list; }; bool intel_ring_initialized(struct intel_engine_cs *ring); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx