Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.h | 16 ++++++--
drivers/gpu/drm/i915/i915_gem.c | 16 +++++---
drivers/gpu/drm/i915/i915_gem_request.c | 46 ++++++++++++++--------
drivers/gpu/drm/i915/i915_gem_request.h | 66 +++++++++++++++++++++++++-------
drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
5 files changed, 114 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 994929660027..3e4feeaeef60 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4022,14 +4022,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
}
static inline bool
-__i915_request_irq_complete(struct drm_i915_gem_request *req)
+__i915_request_irq_complete(const struct drm_i915_gem_request *req)
{
struct intel_engine_cs *engine = req->engine;
+ u32 seqno = i915_gem_request_global_seqno(req);
+
+ /* The request was dequeued before we were awoken. We check after
+ * inspecting the hw to confirm that this was the same request
+ * that generated the HWS update. The memory barriers within
+ * the request execution are sufficient to ensure that a check
+ * after reading the value from hw matches this request.
+ */
+ if (!seqno)
+ return false;
/* Before we do the heavier coherent read of the seqno,
* check the value (hopefully) in the CPU cacheline.
*/
- if (__i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req, seqno))
return true;
/* Ensure our read of the seqno is coherent so that we
@@ -4080,7 +4090,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
wake_up_process(tsk);
rcu_read_unlock();
- if (__i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req, seqno))
return true;
}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fce2cec8e665..f950cedb6516 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -397,7 +397,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
i915_gem_request_retire_upto(rq);
- if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+ if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
/* The GPU is now idle and this client has stalled.
* Since no other client has submitted a request in the
* meantime, assume that this client is the only one
@@ -2609,7 +2609,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *request;
+ struct drm_i915_gem_request *request, *active = NULL;
+ unsigned long flags;
/* We are called by the error capture and reset at a random
* point in time. In particular, note that neither is crucially
@@ -2619,17 +2620,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
* extra delay for a recent interrupt is pointless. Hence, we do
* not need an engine->irq_seqno_barrier() before the seqno reads.
*/
+ spin_lock_irqsave(&engine->timeline->lock, flags);
list_for_each_entry(request, &engine->timeline->requests, link) {
- if (__i915_gem_request_completed(request))
+ if (__i915_gem_request_completed(request,
+ request->global_seqno))
continue;
GEM_BUG_ON(request->engine != engine);
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&request->fence.flags));
- return request;
+
+ active = request;
+ break;
}
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
- return NULL;
+ return active;
}
static bool engine_stalled(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 477e8fc125ce..d18f450977e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
intel_engine_enable_signaling(request);
spin_unlock(&request->lock);
- GEM_BUG_ON(!request->global_seqno);
engine->emit_breadcrumb(request,
request->ring->vaddr + request->postfix);
@@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
/* Move the oldest request to the slab-cache (if not in use!) */
req = list_first_entry_or_null(&engine->timeline->requests,
typeof(*req), link);
- if (req && __i915_gem_request_completed(req))
+ if (req && i915_gem_request_completed(req))
i915_gem_request_retire(req);
/* Beware: Dragons be flying overhead.
@@ -611,6 +610,7 @@ static int
i915_gem_request_await_request(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from)
{
+ u32 seqno;
int ret;
GEM_BUG_ON(to == from);
@@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret < 0 ? ret : 0;
}
- if (!from->global_seqno) {
+ seqno = i915_gem_request_global_seqno(from);
+ if (!seqno) {
ret = i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0,
GFP_KERNEL);
return ret < 0 ? ret : 0;
}
- if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
+ if (seqno <= to->timeline->sync_seqno[from->engine->id])
return 0;
trace_i915_gem_ring_sync_to(to, from);
@@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret;
}
- to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
+ to->timeline->sync_seqno[from->engine->id] = seqno;
return 0;
}
@@ -939,7 +940,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
}
bool __i915_spin_request(const struct drm_i915_gem_request *req,
- int state, unsigned long timeout_us)
+ u32 seqno, int state, unsigned long timeout_us)
{
struct intel_engine_cs *engine = req->engine;
unsigned int irq, cpu;
@@ -957,7 +958,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
irq = atomic_read(&engine->irq_count);
timeout_us += local_clock_us(&cpu);
do {
- if (__i915_gem_request_completed(req))
+ if (seqno != i915_gem_request_global_seqno(req))
+ break;
+
+ if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
+ seqno))
return true;
/* Seqno are meant to be ordered *before* the interrupt. If
@@ -1029,11 +1034,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
if (flags & I915_WAIT_LOCKED)
add_wait_queue(errq, &reset);
+ intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+
reset_wait_queue(&req->execute, &exec);
- if (!req->global_seqno) {
+ if (!wait.seqno) {
do {
set_current_state(state);
- if (req->global_seqno)
+
+ wait.seqno = i915_gem_request_global_seqno(req);
+ if (wait.seqno)
break;
if (flags & I915_WAIT_LOCKED &&
@@ -1061,7 +1070,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
if (timeout < 0)
goto complete;
- GEM_BUG_ON(!req->global_seqno);
+ GEM_BUG_ON(!wait.seqno);
}
GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
@@ -1070,7 +1079,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
goto complete;
set_current_state(state);
- intel_wait_init(&wait, req->global_seqno);
if (intel_engine_add_wait(req->engine, &wait))
/* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a
@@ -1091,7 +1099,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
timeout = io_schedule_timeout(timeout);
- if (intel_wait_complete(&wait))
+ if (intel_wait_complete(&wait) &&
+ i915_gem_request_global_seqno(req) == wait.seqno)
break;
set_current_state(state);
@@ -1142,14 +1151,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
static void engine_retire_requests(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request, *next;
+ u32 seqno = intel_engine_get_seqno(engine);
+ LIST_HEAD(retire);
+ spin_lock_irq(&engine->timeline->lock);
list_for_each_entry_safe(request, next,
&engine->timeline->requests, link) {
- if (!__i915_gem_request_completed(request))
- return;
+ if (!i915_seqno_passed(seqno, request->global_seqno))
+ break;
- i915_gem_request_retire(request);
+ list_move_tail(&request->link, &retire);
}
+ spin_unlock_irq(&engine->timeline->lock);
+
+ list_for_each_entry_safe(request, next, &retire, link)
+ i915_gem_request_retire(request);
}
void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 467d3e13fce0..b81f6709905c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -135,6 +135,11 @@ struct drm_i915_gem_request {
struct i915_priotree priotree;
struct i915_dependency dep;
+ /** GEM sequence number associated with this request on the
+ * global execution timeline. It is zero when the request is not
+ * on the HW queue (i.e. not on the engine timeline list).
+ * Its value is guarded by the timeline spinlock.
+ */
u32 global_seqno;
/** Position in the ring of the start of the request */
@@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
+/**
+ * i915_gem_request_global_seqno - report the current global seqno
+ * @request - the request
+ *
+ * A request is assigned a global seqno only when it is on the hardware
+ * execution queue. The global seqno can be used to maintain a list of
+ * requests on the same engine in retirement order, for example for
+ * constructing a priority queue for waiting. Prior to its execution, or
+ * if it is subsequently removed in the event of preemption, its global
+ * seqno is zero. As both insertion and removal from the execution queue
+ * may operate in IRQ context, it is not guarded by the usual struct_mutex
+ * BKL. Instead those relying on the global seqno must be prepared for its
+ * value to change between reads. Only when the request is complete can
+ * the global seqno be stable (due to the memory barriers on submitting
+ * the commands to the hardware to write the breadcrumb, if the HWS shows
+ * that it has passed the global seqno and the global seqno is unchanged
+ * after the read, it is indeed complete).
+ */
+static u32
+i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
+{
+ return READ_ONCE(request->global_seqno);
+}
+
int
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
@@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
}
static inline bool
-__i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
{
- GEM_BUG_ON(!req->global_seqno);
+ GEM_BUG_ON(!seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->global_seqno - 1);
+ seqno - 1);
}
static inline bool
i915_gem_request_started(const struct drm_i915_gem_request *req)
{
- if (!req->global_seqno)
+ u32 seqno;
+
+ seqno = i915_gem_request_global_seqno(req);
+ if (!seqno)
return false;
- return __i915_gem_request_started(req);
+ return __i915_gem_request_started(req, seqno);
}
static inline bool
-__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
{
- GEM_BUG_ON(!req->global_seqno);
- return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->global_seqno);
+ GEM_BUG_ON(!seqno);
+ return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
+ seqno == i915_gem_request_global_seqno(req);
}
static inline bool
i915_gem_request_completed(const struct drm_i915_gem_request *req)
{
- if (!req->global_seqno)
+ u32 seqno;
+
+ seqno = i915_gem_request_global_seqno(req);
+ if (!seqno)
return false;
- return __i915_gem_request_completed(req);
+ return __i915_gem_request_completed(req, seqno);
}
bool __i915_spin_request(const struct drm_i915_gem_request *request,
- int state, unsigned long timeout_us);
+ u32 seqno, int state, unsigned long timeout_us);
static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us)
{
- return (__i915_gem_request_started(request) &&
- __i915_spin_request(request, state, timeout_us));
+ u32 seqno;
+
+ seqno = i915_gem_request_global_seqno(request);
+ return (__i915_gem_request_started(request, seqno) &&
+ __i915_spin_request(request, seqno, state, timeout_us));
}
/* We treat requests as fences. This is not be to confused with our
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 4f4e703d1b14..d5bf4c0b2deb 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *parent, **p;
bool first, wakeup;
+ u32 seqno;
/* Note that we may be called from an interrupt handler on another
* device (e.g. nouveau signaling a fence completion causing us
@@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
assert_spin_locked(&request->lock);
- if (!request->global_seqno)
+
+ seqno = i915_gem_request_global_seqno(request);
+ if (!seqno)
return;
request->signaling.wait.tsk = b->signaler;
- request->signaling.wait.seqno = request->global_seqno;
+ request->signaling.wait.seqno = seqno;
i915_gem_request_get(request);
spin_lock(&b->lock);
@@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
p = &b->signals.rb_node;
while (*p) {
parent = *p;
- if (i915_seqno_passed(request->global_seqno,
- to_signaler(parent)->global_seqno)) {
+ if (i915_seqno_passed(seqno,
+ to_signaler(parent)->signaling.wait.seqno)) {
p = &parent->rb_right;
first = false;
} else {