On 18/05/2020 09:14, Chris Wilson wrote:
Rather than going back and forth between the rb_node entry and the
virtual_engine type, store the ve local and reuse it. As the
container_of conversion from rb_node to virtual_engine requires a
variable offset, performing that conversion just once shaves off a bit
of code.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 93 +++++++++++++++--------------
1 file changed, 47 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 120efb3eaf96..7a5ac3375225 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -451,7 +451,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *rq,
- struct rb_node *rb)
+ struct virtual_engine *ve)
{
int last_prio;
@@ -488,9 +488,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
rq_prio(list_next_entry(rq, sched.link)) > last_prio)
return true;
- if (rb) {
- struct virtual_engine *ve =
- rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ if (ve) {
bool preempt = false;
if (engine == ve->siblings[0]) { /* only preempt one sibling */
@@ -1812,6 +1810,35 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
+static struct virtual_engine *
+first_virtual_engine(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists *el = &engine->execlists;
+ struct rb_node *rb = rb_first_cached(&el->virtual);
+
+ while (rb) {
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ struct i915_request *rq = READ_ONCE(ve->request);
+
+ if (!rq) { /* lazily cleanup after another engine handled rq */
+ rb_erase_cached(rb, &el->virtual);
+ RB_CLEAR_NODE(rb);
+ rb = rb_first_cached(&el->virtual);
+ continue;
+ }
+
+ if (!virtual_matches(ve, rq, engine)) {
+ rb = rb_next(rb);
+ continue;
+ }
+
+ return ve;
+ }
+
+ return NULL;
+}
+
static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
{
/*
@@ -1896,7 +1923,7 @@ static void defer_active(struct intel_engine_cs *engine)
static bool
need_timeslice(const struct intel_engine_cs *engine,
const struct i915_request *rq,
- const struct rb_node *rb)
+ struct virtual_engine *ve)
{
int hint;
@@ -1905,9 +1932,7 @@ need_timeslice(const struct intel_engine_cs *engine,
hint = engine->execlists.queue_priority_hint;
- if (rb) {
- const struct virtual_engine *ve =
- rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ if (ve) {
const struct intel_engine_cs *inflight =
intel_context_inflight(&ve->context);
@@ -2057,8 +2082,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
struct i915_request ** const last_port = port + execlists->port_mask;
- struct i915_request * const *active;
+ struct i915_request * const *active = READ_ONCE(execlists->active);
struct i915_request *last;
+ struct virtual_engine *ve;
struct rb_node *rb;
bool submit = false;
@@ -2084,26 +2110,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* and context switches) submission.
*/
- for (rb = rb_first_cached(&execlists->virtual); rb; ) {
- struct virtual_engine *ve =
- rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
- struct i915_request *rq = READ_ONCE(ve->request);
-
- if (!rq) { /* lazily cleanup after another engine handled rq */
- rb_erase_cached(rb, &execlists->virtual);
- RB_CLEAR_NODE(rb);
- rb = rb_first_cached(&execlists->virtual);
- continue;
- }
-
- if (!virtual_matches(ve, rq, engine)) {
- rb = rb_next(rb);
- continue;
- }
-
- break;
- }
-
/*
* If the queue is higher priority than the last
* request in the currently active context, submit afresh.
@@ -2111,10 +2117,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the active context to interject the preemption request,
* i.e. we will retrigger preemption following the ack in case
* of trouble.
- */
- active = READ_ONCE(execlists->active);
-
- /*
+ *
* In theory we can skip over completed contexts that have not
* yet been processed by events (as those events are in flight):
*
@@ -2125,9 +2128,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* find itself trying to jump back into a context it has just
* completed and barf.
*/
-
if ((last = *active)) {
- if (need_preempt(engine, last, rb)) {
+ ve = first_virtual_engine(engine);
If you left this outside the if..
+
+ if (need_preempt(engine, last, ve)) {
if (i915_request_completed(last)) {
tasklet_hi_schedule(&execlists->tasklet);
return;
@@ -2158,7 +2162,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
__unwind_incomplete_requests(engine);
last = NULL;
- } else if (need_timeslice(engine, last, rb) &&
+ } else if (need_timeslice(engine, last, ve) &&
timeslice_expired(execlists, last)) {
if (i915_request_completed(last)) {
tasklet_hi_schedule(&execlists->tasklet);
@@ -2212,9 +2216,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
}
- while (rb) { /* XXX virtual is always taking precedence */
- struct virtual_engine *ve =
- rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ /* XXX virtual is always taking precedence */
+ while ((ve = first_virtual_engine(engine))) {
... then here you wouldn't have to do a re-lookup of the same node, right?
struct i915_request *rq;
spin_lock(&ve->base.active.lock);
@@ -2222,9 +2225,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
rq = ve->request;
if (unlikely(!rq)) { /* lost the race to a sibling */
spin_unlock(&ve->base.active.lock);
+
+ rb = &ve->nodes[engine->id].rb;
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
- rb = rb_first_cached(&execlists->virtual);
continue;
}
@@ -2233,11 +2237,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq->context != &ve->context);
if (rq_prio(rq) >= queue_prio(execlists)) {
- if (!virtual_matches(ve, rq, engine)) {
- spin_unlock(&ve->base.active.lock);
- rb = rb_next(rb);
- continue;
- }
+ GEM_BUG_ON(!virtual_matches(ve, rq, engine));
But you'd have to keep the virtual_matches logic. Don't know.. no strong
feelings either way.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Regards,
Tvrtko
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
@@ -2257,6 +2257,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
WRITE_ONCE(ve->request, NULL);
WRITE_ONCE(ve->base.execlists.queue_priority_hint,
INT_MIN);
+
+ rb = &ve->nodes[engine->id].rb;
rb_erase_cached(rb, &execlists->virtual);
RB_CLEAR_NODE(rb);
@@ -2309,7 +2311,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
if (!submit) {
spin_unlock(&ve->base.active.lock);
- rb = rb_first_cached(&execlists->virtual);
continue;
}
}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx