For future GuC firmware, the intention is to submit a large number of contexts and their interdependencies and leave the execution order to the firmware. As such, we want to allow the firmware freedom to execute independent contexts in whatever order suits it and so must forgo the concept of a single execution timeline with a predefined global seqno. As we have transitioned to per-context timelines, we should be agnostic to the actual execution order, tracking execution along each timeline independently. Having made this transition to per-context timelines, we can remove the engine->timeline and request->global_seqno. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_gem.c | 21 +++- drivers/gpu/drm/i915/i915_gem_context.c | 2 - drivers/gpu/drm/i915/i915_gpu_error.c | 47 ++------ drivers/gpu/drm/i915/i915_gpu_error.h | 6 - drivers/gpu/drm/i915/i915_request.c | 106 +++++------------- drivers/gpu/drm/i915/i915_request.h | 32 ------ drivers/gpu/drm/i915/i915_reset.c | 18 ++- drivers/gpu/drm/i915/i915_scheduler.c | 12 +- drivers/gpu/drm/i915/i915_timeline.h | 2 - drivers/gpu/drm/i915/i915_trace.h | 25 ++--- drivers/gpu/drm/i915/intel_engine_cs.c | 54 +++------ drivers/gpu/drm/i915/intel_guc_submission.c | 8 +- drivers/gpu/drm/i915/intel_lrc.c | 92 +++++---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 72 +++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 49 +------- drivers/gpu/drm/i915/selftests/i915_request.c | 3 +- .../gpu/drm/i915/selftests/intel_hangcheck.c | 5 +- drivers/gpu/drm/i915/selftests/mock_engine.c | 15 +-- 19 files changed, 139 insertions(+), 431 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 16a9384de478..c308c2c589f0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1322,7 +1322,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) engine->hangcheck.next_seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); - seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 96b33f8ba9a9..9a78e1c9b323 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2879,6 +2879,14 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } +static bool match_ring(struct i915_request *rq) +{ + struct drm_i915_private *dev_priv = rq->i915; + u32 ring = I915_READ(RING_START(rq->engine->mmio_base)); + + return ring == i915_ggtt_offset(rq->ring->vma); +} + struct i915_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -2896,15 +2904,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { + spin_lock_irqsave(&engine->execution_lock, flags); + list_for_each_entry(request, &engine->requests, link) { if (i915_request_completed(request)) continue; + if (!i915_request_started(request)) + continue; + + /* More than one preemptible request may match! */ + if (!match_ring(request)) + continue; + active = request; break; } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ecba3ee204a6..e8334c4bc130 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -635,8 +635,6 @@ last_request_on_engine(struct i915_timeline *timeline, { struct i915_request *rq; - GEM_BUG_ON(timeline == &engine->timeline); - rq = i915_gem_active_raw(&timeline->last_request, &engine->i915->drm.struct_mutex); if (rq && rq->engine == engine) { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7d26c25ee785..ecab20b6d26e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -380,19 +380,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, "%s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x_%08x %8u %02x %02x %02x", + err_printf(m, " %08x_%08x %8u %02x %02x", upper_32_bits(err->gtt_offset), lower_32_bits(err->gtt_offset), err->size, err->read_domains, - err->write_domain, - err->wseqno); + err->write_domain); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); err_puts(m, err->userptr ? " userptr" : ""); - err_puts(m, err->engine != -1 ? " " : ""); - err_puts(m, engine_name(m->i915, err->engine)); err_puts(m, i915_cache_level_str(m->i915, err->cache_level)); if (err->name) @@ -444,12 +441,11 @@ static void error_print_request(struct drm_i915_error_state_buf *m, const struct drm_i915_error_request *erq, const unsigned long epoch) { - if (!erq->seqno) + if (!erq->context) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", - prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->sched_attr.priority, + err_printf(m, "%s pid %d, ban score %d, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", + prefix, erq->pid, erq->ban_score, erq->sched_attr.priority, jiffies_to_msecs(erq->jiffies - epoch), erq->start, erq->head, erq->tail); } @@ -528,8 +524,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, ee->vm_info.pp_dir_base); } } - err_printf(m, " seqno: 0x%08x\n", ee->seqno); - err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno); err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head); err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail); err_printf(m, " hangcheck timestamp: %dms (%lu%s)\n", @@ -1056,27 +1050,6 @@ i915_error_object_create(struct drm_i915_private *i915, return dst; } -/* The error capture is special as tries to run underneath the normal - * locking rules - so we use the raw version of the i915_gem_active lookup. - */ -static inline u32 -__active_get_seqno(struct i915_gem_active *active) -{ - struct i915_request *request; - - request = __i915_gem_active_peek(active); - return request ? request->global_seqno : 0; -} - -static inline int -__active_get_engine_id(struct i915_gem_active *active) -{ - struct i915_request *request; - - request = __i915_gem_active_peek(active); - return request ? request->engine->id : -1; -} - static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { @@ -1085,9 +1058,6 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; - err->wseqno = __active_get_seqno(&obj->frontbuffer_write); - err->engine = __active_get_engine_id(&obj->frontbuffer_write); - err->gtt_offset = vma->node.start; err->read_domains = obj->read_domains; err->write_domain = obj->write_domain; @@ -1218,8 +1188,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error, ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); - ee->seqno = intel_engine_get_seqno(engine); - ee->last_seqno = intel_engine_last_submit(engine); ee->start = I915_READ_START(engine); ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); @@ -1292,7 +1260,6 @@ static void record_request(struct i915_request *request, erq->context = ctx->hw_id; erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&ctx->ban_score); - erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; @@ -1312,7 +1279,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline.requests, link) + list_for_each_entry_from(request, &engine->requests, link) count++; if (!count) return; @@ -1325,7 +1292,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline.requests, link) { + list_for_each_entry_from(request, &engine->requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 99a53c0cd6da..59f5cf327edd 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -94,8 +94,6 @@ struct i915_gpu_state { u32 cpu_ring_head; u32 cpu_ring_tail; - u32 last_seqno; - /* Register state */ u32 start; u32 tail; @@ -108,7 +106,6 @@ struct i915_gpu_state { u32 bbstate; u32 instpm; u32 instps; - u32 seqno; u64 bbaddr; u64 acthd; u32 fault_reg; @@ -149,7 +146,6 @@ struct i915_gpu_state { pid_t pid; u32 context; int ban_score; - u32 seqno; u32 start; u32 head; u32 tail; @@ -169,7 +165,6 @@ struct i915_gpu_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 wseqno; u64 gtt_offset; u32 read_domains; u32 write_domain; @@ -178,7 +173,6 @@ struct i915_gpu_state { u32 dirty:1; u32 purgeable:1; u32 userptr:1; - s32 engine:4; u32 cache_level:3; } *active_bo[I915_NUM_ENGINES], *pinned_bo; u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 6ff0c47c3a0d..7bccf578cd65 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -179,24 +179,15 @@ static void free_capture_list(struct i915_request *request) } } -static void __retire_engine_request(struct intel_engine_cs *engine, - struct i915_request *rq) +static void __retire_engine(struct i915_request *rq) { - GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n", - __func__, engine->name, - rq->fence.context, rq->fence.seqno, - rq->global_seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(engine)); - - GEM_BUG_ON(!i915_request_completed(rq)); + struct intel_engine_cs *engine = rq->engine; local_irq_disable(); - spin_lock(&engine->timeline.lock); - GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); + spin_lock(&engine->execution_lock); list_del_init(&rq->link); - spin_unlock(&engine->timeline.lock); + spin_unlock(&engine->execution_lock); spin_lock(&rq->lock); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) @@ -212,33 +203,14 @@ static void __retire_engine_request(struct intel_engine_cs *engine, local_irq_enable(); } -static void __retire_engine_upto(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - struct i915_request *tmp; - - if (list_empty(&rq->link)) - return; - - do { - tmp = list_first_entry(&engine->timeline.requests, - typeof(*tmp), link); - - GEM_BUG_ON(tmp->engine != engine); - __retire_engine_request(engine, tmp); - } while (tmp != rq); -} - static void i915_request_retire(struct i915_request *request) { struct i915_gem_active *active, *next; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", request->engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, - hwsp_seqno(request), - intel_engine_get_seqno(request->engine)); + hwsp_seqno(request)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); @@ -283,7 +255,7 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->gem_context->ban_score); - __retire_engine_upto(request->engine, request); + __retire_engine(request); unreserve_gt(request->i915); @@ -296,12 +268,10 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_ring *ring = rq->ring; struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld, current %d\n", rq->engine->name, rq->fence.context, rq->fence.seqno, - rq->global_seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(rq->engine)); + hwsp_seqno(rq)); lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); @@ -324,41 +294,30 @@ static u32 timeline_get_seqno(struct i915_timeline *tl) } static void move_to_timeline(struct i915_request *request, - struct i915_timeline *timeline) + struct list_head *timeline) { - GEM_BUG_ON(request->timeline == &request->engine->timeline); - lockdep_assert_held(&request->engine->timeline.lock); + lockdep_assert_held(&request->engine->execution_lock); spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); + list_move_tail(&request->link, timeline); spin_unlock(&request->timeline->lock); } void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - u32 seqno; - GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld -> current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline.seqno + 1, - hwsp_seqno(request), - intel_engine_get_seqno(engine)); + hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); - - GEM_BUG_ON(request->global_seqno); - - seqno = timeline_get_seqno(&engine->timeline); - GEM_BUG_ON(!seqno); - GEM_BUG_ON(intel_engine_signaled(engine, seqno)); + lockdep_assert_held(&engine->execution_lock); /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); - request->global_seqno = seqno; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !intel_engine_enable_signaling(request)) intel_engine_queue_breadcrumbs(engine); @@ -368,7 +327,7 @@ void __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, &engine->timeline); + move_to_timeline(request, &engine->requests); trace_i915_request_execute(request); } @@ -379,54 +338,41 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n", + GEM_TRACE("%s fence %llx:%lld <- current %d\n", engine->name, request->fence.context, request->fence.seqno, - request->global_seqno, - hwsp_seqno(request), - intel_engine_get_seqno(engine)); + hwsp_seqno(request)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->execution_lock); /* * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ - GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); - GEM_BUG_ON(intel_engine_has_completed(engine, request->global_seqno)); - engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - request->global_seqno = 0; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) intel_engine_cancel_signaling(request); clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ - move_to_timeline(request, request->timeline); + move_to_timeline(request, &request->timeline->requests); - /* - * We don't need to wake_up any waiters on request->execute, they - * will get woken by any other event or us re-adding this request - * to the engine timeline (__i915_request_submit()). The waiters - * should be quite adapt at finding that the request now has a new - * global_seqno to the one they went to sleep on. - */ + GEM_BUG_ON(i915_request_completed(request)); } void i915_request_unsubmit(struct i915_request *request) @@ -435,11 +381,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static int __i915_sw_fence_call @@ -609,7 +555,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->hw_context = ce; rq->ring = ce->ring; rq->timeline = ce->ring->timeline; - GEM_BUG_ON(rq->timeline == &engine->timeline); rq->hwsp_seqno = rq->timeline->hwsp_seqno; spin_lock_init(&rq->lock); @@ -625,7 +570,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ - rq->global_seqno = 0; rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index dd413d51cc61..679b4663f774 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -129,14 +129,6 @@ struct i915_request { */ const u32 *hwsp_seqno; - /** - * GEM sequence number associated with this request on the - * global execution timeline. It is zero when the request is not - * on the HW queue (i.e. not on the engine timeline list). - * Its value is guarded by the timeline spinlock. - */ - u32 global_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -229,30 +221,6 @@ i915_request_put(struct i915_request *rq) dma_fence_put(&rq->fence); } -/** - * i915_request_global_seqno - report the current global seqno - * @request - the request - * - * A request is assigned a global seqno only when it is on the hardware - * execution queue. The global seqno can be used to maintain a list of - * requests on the same engine in retirement order, for example for - * constructing a priority queue for waiting. Prior to its execution, or - * if it is subsequently removed in the event of preemption, its global - * seqno is zero. As both insertion and removal from the execution queue - * may operate in IRQ context, it is not guarded by the usual struct_mutex - * BKL. Instead those relying on the global seqno must be prepared for its - * value to change between reads. Only when the request is complete can - * the global seqno be stable (due to the memory barriers on submitting - * the commands to the hardware to write the breadcrumb, if the HWS shows - * that it has passed the global seqno and the global seqno is unchanged - * after the read, it is indeed complete). - */ -static inline u32 -i915_request_global_seqno(const struct i915_request *request) -{ - return READ_ONCE(request->global_seqno); -} - int i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index d846f49401a6..e0d87e287b10 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -19,17 +19,13 @@ static void engine_skip_context(struct i915_request *rq) struct i915_gem_context *hung_ctx = rq->gem_context; struct i915_timeline *timeline = rq->timeline; - lockdep_assert_held(&engine->timeline.lock); - GEM_BUG_ON(timeline == &engine->timeline); + lockdep_assert_held(&engine->execution_lock); spin_lock(&timeline->lock); - if (rq->global_seqno) { - list_for_each_entry_continue(rq, - &engine->timeline.requests, link) - if (rq->gem_context == hung_ctx) - i915_request_skip(rq, -EIO); - } + list_for_each_entry(rq, &engine->requests, link) + if (!i915_request_completed(rq) && rq->gem_context == hung_ctx) + i915_request_skip(rq, -EIO); list_for_each_entry(rq, &timeline->requests, link) i915_request_skip(rq, -EIO); @@ -96,7 +92,7 @@ static void context_mark_innocent(struct i915_gem_context *ctx) void i915_reset_request(struct i915_request *rq, bool guilty) { - lockdep_assert_held(&rq->engine->timeline.lock); + lockdep_assert_held(&rq->engine->execution_lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { @@ -754,10 +750,10 @@ static void nop_submit_request(struct i915_request *request) engine->name, request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); __i915_request_submit(request); i915_request_fake_complete(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); intel_engine_queue_breadcrumbs(engine); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index e0b177687bec..53e76a91ad75 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -165,7 +165,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) bool first = true; int idx, i; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->execution_lock); assert_priolists(execlists, INT_MAX); /* buckets sorted from highest [in slot 0] to lowest priority */ @@ -232,8 +232,8 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline.lock); - spin_lock(&engine->timeline.lock); + spin_unlock(&locked->execution_lock); + spin_lock(&engine->execution_lock); } return engine; @@ -244,7 +244,7 @@ static bool inflight(const struct i915_request *rq, { const struct i915_request *active; - if (!rq->global_seqno) + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) return false; active = port_request(engine->execlists.port); @@ -331,7 +331,7 @@ static void __i915_schedule(struct i915_request *rq, last = NULL; engine = rq->engine; - spin_lock_irq(&engine->timeline.lock); + spin_lock_irq(&engine->execution_lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { @@ -383,7 +383,7 @@ static void __i915_schedule(struct i915_request *rq, tasklet_hi_schedule(&engine->execlists.tasklet); } - spin_unlock_irq(&engine->timeline.lock); + spin_unlock_irq(&engine->execution_lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 421eb34568de..d06fa094a7be 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -39,8 +39,6 @@ struct i915_timeline { u32 seqno; spinlock_t lock; -#define TIMELINE_CLIENT 0 /* default subclass */ -#define TIMELINE_ENGINE 1 unsigned int pin_count; const u32 *hwsp_seqno; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index cb5bc65d575d..d8fe328ada36 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -625,7 +625,6 @@ DECLARE_EVENT_CLASS(i915_request, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) ), TP_fast_assign( @@ -635,13 +634,11 @@ DECLARE_EVENT_CLASS(i915_request, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global) + __entry->hw_id, __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -671,7 +668,6 @@ TRACE_EVENT(i915_request_in, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, port) __field(u32, prio) ), @@ -683,15 +679,14 @@ TRACE_EVENT(i915_request_in, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->prio = rq->sched.attr.priority; __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, global=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->prio, __entry->global_seqno, __entry->port) + __entry->prio, __entry->port) ); TRACE_EVENT(i915_request_out, @@ -705,7 +700,6 @@ TRACE_EVENT(i915_request_out, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global_seqno) __field(u32, completed) ), @@ -716,14 +710,13 @@ TRACE_EVENT(i915_request_out, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->completed) + __entry->completed) ); #else @@ -766,7 +759,6 @@ TRACE_EVENT(i915_request_wait_begin, __field(u16, class) __field(u16, instance) __field(u32, seqno) - __field(u32, global) __field(unsigned int, flags) ), @@ -783,14 +775,13 @@ TRACE_EVENT(i915_request_wait_begin, __entry->instance = rq->engine->instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; - __entry->global = rq->global_seqno; __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, global=%u, blocking=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags) ); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b7f129504014..47f3cad6e861 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -455,12 +455,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv) return err; } -void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); -} - static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { i915_gem_batch_pool_init(&engine->batch_pool, engine); @@ -588,29 +582,19 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) { int err; + spin_lock_init(&engine->execution_lock); + INIT_LIST_HEAD(&engine->requests); + err = init_status_page(engine); if (err) return err; - err = i915_timeline_init(engine->i915, - &engine->timeline, - engine->name, - engine->status_page.vma); - if (err) - goto err_hwsp; - - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); return 0; - -err_hwsp: - cleanup_status_page(engine); - return err; } static void context_unpin(struct i915_gem_context *ctx, @@ -694,8 +678,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) context_unpin(i915->preempt_context, engine); context_unpin(i915->kernel_context, engine); - i915_timeline_fini(&engine->timeline); - intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->wa_list); intel_wa_list_free(&engine->whitelist); @@ -954,10 +936,6 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (i915_terminally_wedged(&dev_priv->gpu_error)) return true; - /* Any inflight/incomplete requests? */ - if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) - return false; - /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; @@ -1178,15 +1156,14 @@ static void print_request(struct drm_printer *m, x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); - drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n", + drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", prefix, - rq->global_seqno, + rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags) ? "+" : "", - rq->fence.context, rq->fence.seqno, buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); @@ -1392,12 +1369,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n", - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine), + drm_printf(m, "\tHangcheck %x/%x [%d ms]\n", engine->hangcheck.last_seqno, engine->hangcheck.next_seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp +)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1406,14 +1382,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) + rq = list_first_entry(&engine->requests, struct i915_request, link); + if (&rq->link != &engine->requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) + rq = list_last_entry(&engine->requests, struct i915_request, link); + if (&rq->link != &engine->requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); @@ -1494,7 +1468,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { @@ -1520,7 +1494,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) unlock: write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); return err; } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index b044162a41d3..8d62d7fd51e8 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -534,7 +534,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) spin_lock(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring_tail, rq->global_seqno); + ring_tail, rq->fence.seqno); guc_ring_doorbell(client); client->submissions[engine->id] += 1; @@ -730,7 +730,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->execution_lock); if (port_isset(port)) { if (intel_engine_has_preemption(engine)) { @@ -811,7 +811,7 @@ static void guc_submission_tasklet(unsigned long data) struct i915_request *rq; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); rq = port_request(port); while (rq && i915_request_completed(rq)) { @@ -836,7 +836,7 @@ static void guc_submission_tasklet(unsigned long data) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)) guc_dequeue(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void guc_reset_prepare(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 541e0e9ee781..323341e9bf2d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -172,11 +172,6 @@ static void execlists_init_reg_state(u32 *reg_state, struct intel_engine_cs *engine, struct intel_ring *ring); -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR; -} - static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine) { return (i915_ggtt_offset(engine->status_page.vma) + @@ -285,11 +280,9 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *uninitialized_var(pl); int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->execution_lock); - list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline.requests, - link) { + list_for_each_entry_safe_reverse(rq, rn, &engine->requests, link) { if (i915_request_completed(rq)) break; @@ -452,13 +445,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, fence=%llx:%lld, current=%d, prio=%d\n", engine->name, n, port[n].context_id, count, - rq->global_seqno, rq->fence.context, rq->fence.seqno, hwsp_seqno(rq), - intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -754,13 +745,11 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n", + GEM_TRACE("%s:port%u fence=%llx:%lld current=%d\n", rq->engine->name, (unsigned int)(port - execlists->port), - rq->global_seqno, rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq), - intel_engine_get_seqno(rq->engine)); + hwsp_seqno(rq)); GEM_BUG_ON(!execlists->active); execlists_context_schedule_out(rq, @@ -816,9 +805,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s current %d\n", - engine->name, intel_engine_get_seqno(engine)); - /* * Before we call engine->cancel_requests(), we should have exclusive * access to the submission state. This is arranged for us by the @@ -833,19 +819,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); execlists_user_end(execlists); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { - GEM_BUG_ON(!rq->global_seqno); - + list_for_each_entry(rq, &engine->requests, link) { if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) dma_fence_set_error(&rq->fence, -EIO); - i915_request_fake_complete(rq); } @@ -867,10 +850,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) kmem_cache_free(engine->i915->priorities, p); } - intel_write_status_page(engine, - I915_GEM_HWS_INDEX, - intel_engine_last_submit(engine)); - /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority = INT_MIN; @@ -880,7 +859,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static inline bool @@ -984,14 +963,12 @@ static void process_csb(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, fence=%llx:%lld current=%d, prio=%d\n", engine->name, port->context_id, count, - rq ? rq->global_seqno : 0, rq ? rq->fence.context : 0, rq ? rq->fence.seqno : 0, rq ? hwsp_seqno(rq) : 0, - intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -1056,7 +1033,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->execution_lock); process_csb(engine); if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) @@ -1077,9 +1054,9 @@ static void execlists_submission_tasklet(unsigned long data) !!engine->i915->gt.awake, engine->execlists.active); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void queue_request(struct intel_engine_cs *engine, @@ -1118,7 +1095,7 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); queue_request(engine, &request->sched, rq_prio(request)); @@ -1127,7 +1104,7 @@ static void execlists_submit_request(struct i915_request *request) submit_queue(engine, request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void execlists_context_destroy(struct intel_context *ce) @@ -1160,9 +1137,9 @@ static void execlists_context_unpin(struct intel_context *ce) if (unlikely(engine)) { unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); process_csb(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); GEM_BUG_ON(READ_ONCE(ce->active)); } @@ -1787,9 +1764,9 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) GEM_BUG_ON(!reset_in_progress(execlists)); /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); process_csb(engine); /* drain preemption events */ - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void execlists_reset(struct intel_engine_cs *engine, bool stalled) @@ -1799,7 +1776,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) unsigned long flags; u32 *regs; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); /* * Catch up with any missed context-switch interrupts. @@ -1818,12 +1795,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) /* Following the reset, we need to reload the CSB read/write pointers */ reset_csb_pointers(&engine->execlists); - GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine), - yesno(stalled)); - if (!rq) + if (!rq || !i915_request_started(rq)) goto out_unlock; /* @@ -1865,7 +1837,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) intel_ring_update_space(rq->ring); out_unlock: - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -2061,9 +2033,6 @@ static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs) static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) { - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - cs = gen8_emit_ggtt_write(cs, request->fence.seqno, i915_timeline_seqno_address(request->timeline)); @@ -2072,10 +2041,6 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) intel_engine_next_hangcheck_seqno(request->engine), intel_hws_hangcheck_address(request->engine)); - cs = gen8_emit_ggtt_write(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine)); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2084,7 +2049,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_sz = 14 + WA_TAIL_DWORDS; +static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) { @@ -2102,11 +2067,6 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) intel_hws_hangcheck_address(request->engine), PIPE_CONTROL_CS_STALL); - cs = gen8_emit_ggtt_write_rcs(cs, - request->global_seqno, - intel_hws_seqno_address(request->engine), - PIPE_CONTROL_CS_STALL); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2115,7 +2075,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) gen8_emit_wa_tail(request, cs); } -static const int gen8_emit_breadcrumb_rcs_sz = 20 + WA_TAIL_DWORDS; +static const int gen8_emit_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS; static int gen8_init_rcs_context(struct i915_request *rq) { @@ -2765,11 +2725,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->requests, link) { if (count++ < max - 1) show_request(m, rq, "\t\tE "); else @@ -2809,7 +2769,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tQ "); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bc513c1db33..e2c415aa8354 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -43,11 +43,6 @@ */ #define LEGACY_REQUEST_SIZE 200 -static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) -{ - return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR; -} - static unsigned int __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) @@ -329,18 +324,13 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; - *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); } -static const int gen6_rcs_emit_breadcrumb_sz = 18; +static const int gen6_rcs_emit_breadcrumb_sz = 14; static int gen7_render_ring_cs_stall_wa(struct i915_request *rq) @@ -434,13 +424,6 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = i915_timeline_seqno_address(rq->timeline); *cs++ = rq->fence.seqno; - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL); - *cs++ = intel_hws_seqno_address(rq->engine); - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -459,16 +442,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); } -static const int gen6_xcs_emit_breadcrumb_sz = 10; +static const int gen6_xcs_emit_breadcrumb_sz = 8; #define GEN7_XCS_WA 32 static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) @@ -483,10 +463,6 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = rq->global_seqno; - for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR; @@ -498,12 +474,11 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); } -static const int gen7_xcs_emit_breadcrumb_sz = 14 + GEN7_XCS_WA * 3; +static const int gen7_xcs_emit_breadcrumb_sz = 10 + GEN7_XCS_WA * 3; #undef GEN7_XCS_WA static void set_hwstam(struct intel_engine_cs *engine, u32 mask) @@ -755,25 +730,19 @@ static void reset_prepare(struct intel_engine_cs *engine) static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - struct i915_timeline *tl = &engine->timeline; struct i915_request *pos, *rq; unsigned long flags; u32 head; rq = NULL; - spin_lock_irqsave(&tl->lock, flags); - list_for_each_entry(pos, &tl->requests, link) { + spin_lock_irqsave(&engine->execution_lock, flags); + list_for_each_entry(pos, &engine->requests, link) { if (!i915_request_completed(pos)) { rq = pos; break; } } - GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n", - engine->name, - rq ? rq->global_seqno : 0, - intel_engine_get_seqno(engine), - yesno(stalled)); /* * The guilty request will get skipped on a hung engine. * @@ -821,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } engine->buffer->head = intel_ring_wrap(engine->buffer, head); - spin_unlock_irqrestore(&tl->lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -899,25 +868,19 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->execution_lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, link) { - GEM_BUG_ON(!request->global_seqno); - + list_for_each_entry(request, &engine->requests, link) { if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) dma_fence_set_error(&request->fence, -EIO); i915_request_fake_complete(request); } - intel_write_status_page(engine, - I915_GEM_HWS_INDEX, - intel_engine_last_submit(engine)); - /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->execution_lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -944,19 +907,14 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); } -static const int i9xx_emit_breadcrumb_sz = 12; +static const int i9xx_emit_breadcrumb_sz = 8; -#define GEN5_WA_STORES 8 /* must be at least 1! */ +#define GEN5_WA_STORES 8 static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; @@ -973,11 +931,10 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_INDEX_ADDR; - *cs++ = rq->global_seqno; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR; + *cs++ = 0; } *cs++ = MI_USER_INTERRUPT; @@ -1331,7 +1288,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - GEM_BUG_ON(timeline == &engine->timeline); lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 76e06ca0490b..9ca8f5ff5dd4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -365,7 +365,10 @@ struct intel_engine_cs { struct intel_ring *buffer; - struct i915_timeline timeline; + struct list_head requests; + spinlock_t execution_lock; +#define EXECUTION_HW 0 /* default */ +#define EXECUTION_VIRTUAL 1 struct drm_i915_gem_object *default_state; void *pinned_default_state; @@ -700,9 +703,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) * * The area from dword 0x30 to 0x3ff is available for driver usage. */ -#define I915_GEM_HWS_INDEX 0x30 -#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) -#define I915_GEM_HWS_PREEMPT_INDEX 0x32 +#define I915_GEM_HWS_PREEMPT_INDEX 0x30 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) #define I915_GEM_HWS_HANGCHECK 0x34 #define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK << MI_STORE_DWORD_INDEX_SHIFT) @@ -812,8 +813,6 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) return tail; } -void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); - int intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); @@ -831,44 +830,6 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); -static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) -{ - /* - * We are only peeking at the tail of the submit queue (and not the - * queue itself) in order to gain a hint as to the current active - * state of the engine. Callers are not expected to be taking - * engine->timeline->lock, nor are they expected to be concerned - * wtih serialising this hint with anything, so document it as - * a hint and nothing more. - */ - return READ_ONCE(engine->timeline.seqno); -} - -static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); -} - -static inline bool intel_engine_signaled(struct intel_engine_cs *engine, - u32 seqno) -{ - return i915_seqno_passed(intel_engine_get_seqno(engine), seqno); -} - -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, - u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_signaled(engine, seqno); -} - -static inline bool intel_engine_has_started(struct intel_engine_cs *engine, - u32 seqno) -{ - GEM_BUG_ON(!seqno); - return intel_engine_signaled(engine, seqno - 1); -} - void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 5953a47827a7..fa079c6d9c65 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -225,8 +225,7 @@ static int igt_request_rewind(void *arg) mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { - pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", - vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + pr_err("timed out waiting for high priority request\n"); goto err; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 04d66b4303ab..9fe9ba66b5ec 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -565,11 +565,10 @@ static int active_request_put(struct i915_request *rq) return 0; if (i915_request_wait(rq, 0, 5 * HZ) < 0) { - GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld, seqno %d.\n", + GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld.\n", rq->engine->name, rq->fence.context, - rq->fence.seqno, - i915_request_global_seqno(rq)); + rq->fence.seqno); GEM_TRACE_DUMP(); i915_gem_set_wedged(rq->i915); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 0c5649044bc9..9f76ae27a6c2 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -79,8 +79,6 @@ static void advance(struct mock_request *request) { list_del_init(&request->link); i915_request_fake_complete(&request->base); - intel_engine_write_global_seqno(request->base.engine, - request->base.global_seqno); intel_engine_queue_breadcrumbs(request->base.engine); } @@ -184,7 +182,6 @@ static void mock_submit_request(struct i915_request *request) unsigned long flags; i915_request_submit(request); - GEM_BUG_ON(!request->global_seqno); spin_lock_irqsave(&engine->hw_lock, flags); list_add_tail(&mock->link, &engine->hw_queue); @@ -221,12 +218,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - if (i915_timeline_init(i915, - &engine->base.timeline, - engine->base.name, - NULL)) - goto err_free; - i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); + spin_lock_init(&engine->base.execution_lock); + INIT_LIST_HEAD(&engine->base.requests); engine->base.execlists.queue_priority = INT_MIN; engine->base.execlists.queue = RB_ROOT_CACHED; @@ -246,8 +239,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, err_breadcrumbs: intel_engine_fini_breadcrumbs(&engine->base); - i915_timeline_fini(&engine->base.timeline); -err_free: kfree(engine); return NULL; } @@ -268,7 +259,6 @@ void mock_engine_flush(struct intel_engine_cs *engine) void mock_engine_reset(struct intel_engine_cs *engine) { - intel_engine_write_global_seqno(engine, 0); } void mock_engine_free(struct intel_engine_cs *engine) @@ -281,7 +271,6 @@ void mock_engine_free(struct intel_engine_cs *engine) context_unpin(engine->i915->kernel_context, engine); intel_engine_fini_breadcrumbs(engine); - i915_timeline_fini(&engine->timeline); kfree(engine); } -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx