On 22/02/2017 11:46, Chris Wilson wrote:
After the request is cancelled, we then need to remove it from the global execution timeline and return it to the context timeline, the inverse of submit_request(). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_request.c | 58 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_request.h | 3 ++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 ++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 --- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 6 +++ 5 files changed, 83 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d18f450977e0..97116e492d01 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + + assert_spin_locked(&engine->timeline->lock); + + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; + + /* We may be recursing from the signal callback of another i915 fence */
Copy-paste of the comment of there will really be preemption triggered from the signal callback?
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait, i915_gem_request_global_seqno(req)); + wait.tsk = current; +restart: reset_wait_queue(&req->execute, &exec); + wait.seqno = i915_gem_request_global_seqno(req);
Not sure if it is worth dropping intel_wait_init, I presume to avoid assigning the task twice? It will still be the same task so just moving the intel_wait_init here would be clearer.
if (!wait.seqno) { do { set_current_state(state); @@ -1135,6 +1186,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (i915_gem_request_global_seqno(req) != wait.seqno) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index b81f6709905c..5f73d8c0a38a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 882e601ebb09..5bcad7872c08 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -453,7 +453,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + u32 seqno = READ_ONCE(request->global_seqno); + + return seqno == request->signaling.wait.seqno; +} + +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -462,7 +469,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg) i915_gem_request_put(request); } else { + DEFINE_WAIT(exec); + if (kthread_should_stop()) { GEM_BUG_ON(request); break; } + if (request) + add_wait_queue(&request->execute, &exec); + schedule(); + if (request) + remove_wait_queue(&request->execute, &exec); +
Not directly related but made me think why we are using TASK_INTERRUPTIBLE in the signallers? Shouldn't it be TASK_UNINTERRUPTIBLE and io_schedule? Sounds a bit deja vu though, maybe we have talked about it before..
if (kthread_should_park()) kthread_parkme(); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 45d2c2fa946e..97fde79167a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -582,12 +582,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) -{ - wait->tsk = current; - wait->seqno = seqno; -} - static inline bool intel_wait_complete(const struct intel_wait *wait) { return RB_EMPTY_NODE(&wait->node); diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 6426acc9fdca..62c020c7ea80 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -28,6 +28,12 @@ #include "mock_gem_device.h" #include "mock_engine.h" +static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +{ + wait->tsk = current; + wait->seqno = seqno; +} + static int check_rbtree(struct intel_engine_cs *engine, const unsigned long *bitmap, const struct intel_wait *waiters,
Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx