Drive final request submission from a callback from the fence. This way the request is queued until all dependencies are resolved, at which point it is handed to the backend for queueing to hardware. At this point, no dependencies are set on the request, so the callback is immediate. A side-effect of imposing a heavier-irqsafe spinlock for execlist submission is that we lose the softirq enabling after scheduling the execlists tasklet. To compensate, we manually kickstart the softirq by disabling and enabling the bh around the fence signaling. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_gem_request.c | 20 +++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_request.h | 3 +++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 3 +++ drivers/gpu/drm/i915/intel_lrc.c | 5 +++-- 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5a5a7695cf2..4f5d12e93682 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2548,6 +2548,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) if (i915_gem_request_completed(request)) continue; + if (!i915_sw_fence_done(&request->submit)) + break; + return request; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f31246cf4f6e..839510c29130 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -316,6 +316,19 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) return 0; } +static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence) +{ + struct drm_i915_gem_request *request = + container_of(fence, typeof(*request), submit); + + if (i915_sw_fence_done(fence)) + i915_gem_request_put(request); + else + request->engine->submit_request(request); + + return NOTIFY_DONE; +} + /** * i915_gem_request_alloc - allocate a request structure * @@ -413,6 +426,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); + if (i915.enable_execlists) ret = intel_logical_ring_alloc_request_extras(req); else @@ -528,7 +543,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) reserved_tail, ret); i915_gem_mark_busy(engine); - engine->submit_request(request); + + local_bh_disable(); + i915_sw_fence_commit(&request->submit); + local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ } static unsigned long local_clock_us(unsigned int *cpu) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 49c3c006ec17..50eac3174e63 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -28,6 +28,7 @@ #include <linux/fence.h> #include "i915_gem.h" +#include "i915_sw_fence.h" struct intel_wait { struct rb_node node; @@ -82,6 +83,8 @@ struct drm_i915_gem_request { struct intel_ring *ring; struct intel_signal_node signaling; + struct i915_sw_fence submit; + /** GEM sequence number associated with the previous request, * when the HWS breadcrumb is equal to this the GPU is processing * this request. diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 2491e4c1eaf0..9bad14d22c95 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -462,7 +462,10 @@ static int intel_breadcrumbs_signaler(void *arg) */ intel_engine_remove_wait(engine, &request->signaling.wait); + + local_bh_disable(); fence_signal(&request->fence); + local_bh_enable(); /* kick start the tasklets */ /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index de515d2c13ef..09712d7d707a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -590,14 +590,15 @@ static void intel_lrc_irq_handler(unsigned long data) static void execlists_submit_request(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; + unsigned long flags; - spin_lock_bh(&engine->execlist_lock); + spin_lock_irqsave(&engine->execlist_lock, flags); list_add_tail(&request->execlist_link, &engine->execlist_queue); if (execlists_elsp_idle(engine)) tasklet_hi_schedule(&engine->irq_tasklet); - spin_unlock_bh(&engine->execlist_lock); + spin_unlock_irqrestore(&engine->execlist_lock, flags); } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) -- 2.9.3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx