To continue the onslaught of removing the assumption of a global execution ordering, another casualty is the engine->timeline. Without an actual timeline to track, it is overkill and we can replace it with a much less grand plain list. We still need a list of requests inflight, for the simple purpose of finding inflight requests (for retiring, resetting, preemption etc). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_engine.h | 6 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 62 +++++++------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 6 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 84 +++++++++---------- drivers/gpu/drm/i915/gt/intel_reset.c | 10 +-- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 15 ++-- drivers/gpu/drm/i915/gt/mock_engine.c | 18 ++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c | 5 +- drivers/gpu/drm/i915/i915_request.c | 43 +++------- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 37 ++++---- drivers/gpu/drm/i915/i915_timeline.c | 1 - drivers/gpu/drm/i915/i915_timeline.h | 19 ----- drivers/gpu/drm/i915/i915_timeline_types.h | 4 - drivers/gpu/drm/i915/intel_guc_submission.c | 16 ++-- .../gpu/drm/i915/selftests/mock_timeline.c | 1 - 17 files changed, 145 insertions(+), 188 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 8e1140a0c612..fc9cef0c293b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -573,4 +573,10 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); } +void intel_engine_init_active(struct intel_engine_cs *engine, + unsigned int subclass); +#define ENGINE_PHYSICAL 0 +#define ENGINE_MOCK 1 +#define ENGINE_VIRTUAL 2 + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e268cb4b88d8..8d768f817fad 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -552,14 +552,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) if (err) return err; - err = i915_timeline_init(engine->i915, - &engine->timeline, - engine->status_page.vma); - if (err) - goto err_hwsp; - - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - + intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); @@ -572,10 +565,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_device_default_sseu(&RUNTIME_INFO(engine->i915)->sseu); return 0; - -err_hwsp: - cleanup_status_page(engine); - return err; } /** @@ -734,6 +723,27 @@ static int pin_context(struct i915_gem_context *ctx, return 0; } +void +intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) +{ + INIT_LIST_HEAD(&engine->active.requests); + + spin_lock_init(&engine->active.lock); + lockdep_set_subclass(&engine->active.lock, subclass); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&engine->active.lock.dep_map); + lock_map_release(&engine->active.lock.dep_map); + local_irq_enable(); +#endif +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -797,6 +807,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + GEM_BUG_ON(!list_empty(&engine->active.requests)); + cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); @@ -811,8 +823,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_context_unpin(engine->kernel_context); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); - i915_timeline_fini(&engine->timeline); - intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->wa_list); intel_wa_list_free(&engine->whitelist); @@ -1412,16 +1422,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tlast "); - rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1502,7 +1502,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { @@ -1528,7 +1528,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) unlock: write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); return err; } @@ -1613,22 +1613,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { + spin_lock_irqsave(&engine->active.lock, flags); + list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; if (!i915_request_started(request)) - break; + continue; /* More than one preemptible request may match! */ if (!match_ring(request)) - break; + continue; active = request; break; } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 1f0c4dfbff0e..4f4f796bc0b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -286,7 +286,11 @@ struct intel_engine_cs { struct intel_ring *buffer; - struct i915_timeline timeline; + struct { + spinlock_t lock; + struct list_head requests; + } active; + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 091567eca605..7274eda59ad8 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -310,8 +310,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->link, &engine->timeline.requests) && - rq_prio(list_next_entry(rq, link)) > last_prio) + if (!list_is_last(&rq->sched.link, &engine->active.requests) && + rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; if (rb) { /* XXX virtual precedence */ @@ -320,10 +320,10 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, bool preempt = false; if (engine == ve->siblings[0]) { /* only preempt one sibling */ - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); if (ve->request) preempt = rq_prio(ve->request) > last_prio; - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); } if (preempt) @@ -443,11 +443,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *uninitialized_var(pl); int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline.requests, - link) { + &engine->active.requests, + sched.link) { struct intel_engine_cs *owner; if (i915_request_completed(rq)) @@ -474,8 +474,9 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) } GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - list_add(&rq->sched.link, pl); + list_move(&rq->sched.link, pl); } else { + list_del_init(&rq->sched.link); if (__i915_request_has_started(rq)) rq->sched.attr.priority |= ACTIVE_PRIORITY; @@ -942,11 +943,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) rb_entry(rb, typeof(*ve), nodes[engine->id].rb); struct i915_request *rq; - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); rq = ve->request; if (unlikely(!rq)) { /* lost the race to a sibling */ - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); rb = rb_first_cached(&execlists->virtual); @@ -959,13 +960,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); rb = rb_next(rb); continue; } if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); return; /* leave this rq for another engine */ } @@ -1017,7 +1018,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = rq; } - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); break; } @@ -1074,8 +1075,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - list_del_init(&rq->sched.link); - __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -1176,7 +1175,7 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1336,7 +1335,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); process_csb(engine); if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) @@ -1357,9 +1356,9 @@ static void execlists_submission_tasklet(unsigned long data) !!engine->i915->gt.awake, engine->execlists.active); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void queue_request(struct intel_engine_cs *engine, @@ -1396,7 +1395,7 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); queue_request(engine, &request->sched, rq_prio(request)); @@ -1405,7 +1404,7 @@ static void execlists_submit_request(struct i915_request *request) submit_queue(engine, rq_prio(request)); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void __execlists_context_fini(struct intel_context *ce) @@ -2056,8 +2055,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static bool lrc_regs_ok(const struct i915_request *rq) @@ -2203,11 +2202,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) GEM_TRACE("%s\n", engine->name); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_reset(engine, stalled); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void nop_submission_tasklet(unsigned long data) @@ -2238,12 +2237,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, link) { if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -2274,7 +2273,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); if (ve->request) { ve->request->engine = engine; __i915_request_submit(ve->request); @@ -2283,7 +2282,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) ve->base.execlists.queue_priority_hint = INT_MIN; ve->request = NULL; } - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -2295,7 +2294,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -3038,12 +3037,12 @@ static void virtual_context_destroy(struct kref *kref) if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->timeline.lock); + spin_lock_irq(&sibling->active.lock); if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->timeline.lock); + spin_unlock_irq(&sibling->active.lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); @@ -3051,8 +3050,6 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); kfree(ve->bonds); - - i915_timeline_fini(&ve->base.timeline); kfree(ve); } @@ -3171,16 +3168,16 @@ static void virtual_submission_tasklet(unsigned long data) if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { - spin_lock(&sibling->timeline.lock); + spin_lock(&sibling->active.lock); rb_erase_cached(&node->rb, &sibling->execlists.virtual); RB_CLEAR_NODE(&node->rb); - spin_unlock(&sibling->timeline.lock); + spin_unlock(&sibling->active.lock); } continue; } - spin_lock(&sibling->timeline.lock); + spin_lock(&sibling->active.lock); if (!RB_EMPTY_NODE(&node->rb)) { /* @@ -3224,7 +3221,7 @@ static void virtual_submission_tasklet(unsigned long data) tasklet_hi_schedule(&sibling->execlists.tasklet); } - spin_unlock(&sibling->timeline.lock); + spin_unlock(&sibling->active.lock); } local_irq_enable(); } @@ -3307,10 +3304,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL); - if (err) - goto err_put; - i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL); + intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; @@ -3473,11 +3467,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, sched.link) { if (count++ < max - 1) show_request(m, rq, "\t\tE "); else @@ -3540,7 +3534,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tV "); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } void intel_lr_context_reset(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 1f2833b5bf97..dc75f3ba5ac7 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -47,12 +47,12 @@ static void engine_skip_context(struct i915_request *rq) struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (!i915_request_is_active(rq)) return; - list_for_each_entry_continue(rq, &engine->timeline.requests, link) + list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->gem_context == hung_ctx) i915_request_skip(rq, -EIO); } @@ -128,7 +128,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty) rq->fence.seqno, yesno(guilty)); - lockdep_assert_held(&rq->engine->timeline.lock); + lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { @@ -770,10 +770,10 @@ static void nop_submit_request(struct i915_request *request) engine->name, request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); i915_request_mark_complete(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); intel_engine_queue_breadcrumbs(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 3bbd9b622695..14ddcd7391b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -750,14 +750,13 @@ static void reset_prepare(struct intel_engine_cs *engine) static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - struct i915_timeline *tl = &engine->timeline; struct i915_request *pos, *rq; unsigned long flags; u32 head; rq = NULL; - spin_lock_irqsave(&tl->lock, flags); - list_for_each_entry(pos, &tl->requests, link) { + spin_lock_irqsave(&engine->active.lock, flags); + list_for_each_entry(pos, &engine->active.requests, sched.link) { if (!i915_request_completed(pos)) { rq = pos; break; @@ -811,7 +810,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } engine->buffer->head = intel_ring_wrap(engine->buffer, head); - spin_unlock_irqrestore(&tl->lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -886,10 +885,10 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, link) { + list_for_each_entry(request, &engine->active.requests, sched.link) { if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); @@ -898,7 +897,7 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1284,8 +1283,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - GEM_BUG_ON(timeline == &engine->timeline); - lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index b7675ef18523..00c666d3e652 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -229,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, sched.link) { + list_for_each_entry(request, &engine->active.requests, sched.link) { if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); i915_request_mark_complete(request); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -285,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; int err; + intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); - if (i915_timeline_init(i915, &engine->timeline, NULL)) - goto err_breadcrumbs; - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - engine->kernel_context = i915_gem_context_get_engine(i915->kernel_context, engine->id); if (IS_ERR(engine->kernel_context)) - goto err_timeline; + goto err_breadcrumbs; err = intel_context_pin(engine->kernel_context); intel_context_put(engine->kernel_context); if (err) - goto err_timeline; + goto err_breadcrumbs; return 0; -err_timeline: - i915_timeline_fini(&engine->timeline); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); return -ENOMEM; @@ -340,7 +335,6 @@ void mock_engine_free(struct intel_engine_cs *engine) intel_context_unpin(engine->kernel_context); intel_engine_fini_breadcrumbs(engine); - i915_timeline_fini(&engine->timeline); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index fc6b9a53bf02..3e9bbcb45fa2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -642,13 +642,15 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) GEM_BUG_ON(i915_request_completed(rq)); i915_sw_fence_init(&rq->submit, dummy_notify); - i915_sw_fence_commit(&rq->submit); + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); return rq; } static void dummy_request_free(struct i915_request *dummy) { + i915_sw_fence_commit(&dummy->submit); + i915_request_mark_complete(dummy); i915_sched_node_fini(&dummy->sched); i915_sw_fence_fini(&dummy->submit); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 144e51ce36bc..679882046527 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1266,7 +1266,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline.requests, link) + list_for_each_entry_from(request, &engine->active.requests, sched.link) count++; if (!count) return; @@ -1279,7 +1279,8 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline.requests, link) { + list_for_each_entry_from(request, + &engine->active.requests, sched.link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3f3a6c0c8363..34c597372c1e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq) local_irq_disable(); - spin_lock(&rq->engine->timeline.lock); - list_del(&rq->link); - spin_unlock(&rq->engine->timeline.lock); + spin_lock(&rq->engine->active.lock); + list_del(&rq->sched.link); + spin_unlock(&rq->engine->active.lock); spin_lock(&rq->lock); i915_request_mark_complete(rq); @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq) intel_context_unpin(rq->hw_context); i915_request_remove_from_client(rq); + list_del(&rq->link); free_capture_list(rq); i915_sched_node_fini(&rq->sched); @@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -static void move_to_timeline(struct i915_request *request, - struct i915_timeline *timeline) -{ - GEM_BUG_ON(request->timeline == &request->engine->timeline); - lockdep_assert_held(&request->engine->timeline.lock); - - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); -} - void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld -> current %d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, request->fence.context, request->fence.seqno, hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -402,6 +392,8 @@ void __i915_request_submit(struct i915_request *request) /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->sched.link, &engine->active.requests); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -416,9 +408,6 @@ void __i915_request_submit(struct i915_request *request) engine->emit_fini_breadcrumb(request, request->ring->vaddr + request->postfix); - /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, &engine->timeline); - engine->serial++; trace_i915_request_execute(request); @@ -430,11 +419,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -447,7 +436,7 @@ void __i915_request_unsubmit(struct i915_request *request) hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* * Only unwind in reverse order, required so that the per-context list @@ -474,9 +463,6 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); - /* Transfer back from the global per-engine timeline to per-context */ - move_to_timeline(request, request->timeline); - /* * We don't need to wake_up any waiters on request->execute, they * will get woken by any other event or us re-adding this request @@ -492,11 +478,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static int __i915_sw_fence_call @@ -662,7 +648,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -1086,9 +1071,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) 0); } - spin_lock_irq(&timeline->lock); list_add_tail(&rq->link, &timeline->requests); - spin_unlock_irq(&timeline->lock); /* * Make sure that no request gazumped us - if it was allocated after diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c9f7d07991c8..edbbdfec24ab 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -217,7 +217,7 @@ struct i915_request { bool waitboost; - /** engine->request_list entry for this request */ + /** timeline->request entry for this request */ struct list_head link; /** ring->request_list entry for this request */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index b58d9c23a876..946b5eb443bf 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -106,8 +106,6 @@ void i915_sched_node_fini(struct i915_sched_node *node) { struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&node->link)); - spin_lock_irq(&schedule_lock); /* @@ -180,7 +178,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) bool first = true; int idx, i; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); assert_priolists(execlists); /* buckets sorted from highest [in slot 0] to lowest priority */ @@ -260,9 +258,9 @@ sched_lock_engine(const struct i915_sched_node *node, * check that the rq still belongs to the newly locked engine. */ while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->timeline.lock); + spin_unlock(&locked->active.lock); memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->timeline.lock); + spin_lock(&engine->active.lock); locked = engine; } @@ -365,7 +363,7 @@ static void __i915_schedule(struct i915_request *rq, memset(&cache, 0, sizeof(cache)); engine = rq->engine; - spin_lock(&engine->timeline.lock); + spin_lock(&engine->active.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { @@ -374,7 +372,7 @@ static void __i915_schedule(struct i915_request *rq, INIT_LIST_HEAD(&dep->dfs_link); engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) @@ -383,14 +381,8 @@ static void __i915_schedule(struct i915_request *rq, GEM_BUG_ON(node_to_request(node)->engine != engine); node->attr.priority = prio; - if (!list_empty(&node->link)) { - GEM_BUG_ON(intel_engine_is_virtual(engine)); - if (!cache.priolist) - cache.priolist = - i915_sched_lookup_priolist(engine, - prio); - list_move_tail(&node->link, cache.priolist); - } else { + + if (!i915_sw_fence_done(&node_to_request(node)->submit)) { /* * If the request is not in the priolist queue because * it is not yet runnable, then it doesn't contribute @@ -399,8 +391,17 @@ static void __i915_schedule(struct i915_request *rq, * queue; but in that case we may still need to reorder * the inflight requests. */ - if (!i915_sw_fence_done(&node_to_request(node)->submit)) - continue; + continue; + } + + if (!list_empty(&node->link) && + !i915_request_is_active(node_to_request(node))) { + GEM_BUG_ON(intel_engine_is_virtual(engine)); + if (!cache.priolist) + cache.priolist = + i915_sched_lookup_priolist(engine, + prio); + list_move_tail(&node->link, cache.priolist); } if (prio <= engine->execlists.queue_priority_hint) @@ -419,7 +420,7 @@ static void __i915_schedule(struct i915_request *rq, tasklet_hi_schedule(&engine->execlists.tasklet); } - spin_unlock(&engine->timeline.lock); + spin_unlock(&engine->active.lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c index 000e1a9b6750..c311ce9c6f9d 100644 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -251,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915, timeline->fence_context = dma_fence_context_alloc(1); - spin_lock_init(&timeline->lock); mutex_init(&timeline->mutex); INIT_ACTIVE_REQUEST(&timeline->last_request); diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 27668a1a69a3..36e5e5a65155 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915, struct i915_vma *hwsp); void i915_timeline_fini(struct i915_timeline *tl); -static inline void -i915_timeline_set_subclass(struct i915_timeline *timeline, - unsigned int subclass) -{ - lockdep_set_subclass(&timeline->lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&timeline->lock.dep_map); - lock_map_release(&timeline->lock.dep_map); - local_irq_enable(); -#endif -} - struct i915_timeline * i915_timeline_create(struct drm_i915_private *i915, struct i915_vma *global_hwsp); diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h index 1688705f4a2b..fce5cb4f1090 100644 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -23,10 +23,6 @@ struct i915_timeline { u64 fence_context; u32 seqno; - spinlock_t lock; -#define TIMELINE_CLIENT 0 /* default subclass */ -#define TIMELINE_ENGINE 1 -#define TIMELINE_VIRTUAL 2 struct mutex mutex; /* protects the flow of requests */ unsigned int pin_count; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index cf6a2a4f8846..0f7809fa08a5 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -739,7 +739,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (port_isset(port)) { if (intel_engine_has_preemption(engine)) { @@ -820,7 +820,7 @@ static void guc_submission_tasklet(unsigned long data) struct i915_request *rq; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); rq = port_request(port); while (rq && i915_request_completed(rq)) { @@ -845,7 +845,7 @@ static void guc_submission_tasklet(unsigned long data) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) guc_dequeue(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void guc_reset_prepare(struct intel_engine_cs *engine) @@ -882,7 +882,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) struct i915_request *rq; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); execlists_cancel_port_requests(execlists); @@ -898,7 +898,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); out_unlock: - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void guc_cancel_requests(struct intel_engine_cs *engine) @@ -924,13 +924,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, link) { if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -959,7 +959,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine) execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void guc_reset_finish(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index e084476469ef..65b52be23d42 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) timeline->i915 = NULL; timeline->fence_context = context; - spin_lock_init(&timeline->lock); mutex_init(&timeline->mutex); INIT_ACTIVE_REQUEST(&timeline->last_request); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx