On 07/08/2018 08:29, Chris Wilson wrote:
Taken from an idea used for FQ_CODEL, we give the first request of a
new request flows a small priority boost. These flows are likely to
correspond with short, interactive tasks and so be more latency sensitive
than the longer free running queues. As soon as the client has more than
one request in the queue, further requests are not boosted and it settles
down into ordinary steady state behaviour. Such small kicks dramatically
help combat the starvation issue, by allowing each client the opportunity
to run even when the system is under heavy throughput load (within the
constraints of the user selected priority).
v2: Mark the preempted request as the start of a new flow, to prevent a
single client being continually gazumped by its peers.
Testcase: igt/benchmarks/rrul
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++++++--
drivers/gpu/drm/i915/i915_scheduler.h | 4 +++-
drivers/gpu/drm/i915/intel_lrc.c | 23 ++++++++++++++++++-----
3 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index fdcd48651973..334a56daefad 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1125,8 +1125,20 @@ void i915_request_add(struct i915_request *request)
*/
local_bh_disable();
rcu_read_lock(); /* RCU serialisation for set-wedged protection */
- if (engine->schedule)
- engine->schedule(request, &request->gem_context->sched);
+ if (engine->schedule) {
+ struct i915_sched_attr attr = request->gem_context->sched;
+
+ /*
+ * Boost priorities to new clients (new request flows).
+ *
+ * Allow interactive/synchronous clients to jump ahead of
+ * the bulk clients. (FQ_CODEL)
+ */
+ if (!prev || i915_request_completed(prev))
+ attr.priority |= I915_PRIORITY_NEWCLIENT;
+
+ engine->schedule(request, &attr);
+ }
rcu_read_unlock();
i915_sw_fence_commit(&request->submit);
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 7edfad0abfd7..e9fb6c1d5e42 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -19,12 +19,14 @@ enum {
I915_PRIORITY_INVALID = INT_MIN
};
-#define I915_USER_PRIORITY_SHIFT 0
+#define I915_USER_PRIORITY_SHIFT 1
#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_MASK (-I915_PRIORITY_COUNT)
+#define I915_PRIORITY_NEWCLIENT BIT(0)
+
struct i915_sched_attr {
/**
* @priority: execution and service priority
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1558214eb502..6dec59872282 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,9 +363,9 @@ static void unwind_wa_tail(struct i915_request *rq)
static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
{
- struct i915_request *rq, *rn;
+ struct i915_request *rq, *rn, *active = NULL;
struct list_head *uninitialized_var(pl);
- int last_prio = I915_PRIORITY_INVALID;
+ int prio = I915_PRIORITY_INVALID;
lockdep_assert_held(&engine->timeline.lock);
@@ -379,12 +379,25 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
unwind_wa_tail(rq);
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
- if (rq_prio(rq) != last_prio) {
- last_prio = rq_prio(rq);
- pl = lookup_priolist(engine, last_prio);
+ if (rq_prio(rq) != prio) {
+ prio = rq_prio(rq);
+ pl = lookup_priolist(engine, prio);
}
list_add(&rq->sched.link, pl);
+
+ active = rq;
+ }
+
+ /*
+ * The active request is now effectively the start of a new client
+ * stream, so give it the equivalent small priority bump to prevent
+ * it being gazumped a second time by another peer.
+ */
+ if (!(prio & I915_PRIORITY_NEWCLIENT)) {
+ list_move_tail(&active->sched.link,
+ lookup_priolist(engine,
+ prio | I915_PRIORITY_NEWCLIENT));
}
}
This sounds fair, I think I'm okay with it. Does it still work well for
mixed media workloads?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx