As we inspect both the tasklet (to check for an active bottom-half) and set the irq-posted flag at the same time (both in the interrupt handler and then in the bottom-halt), group those two together into the same cacheline. (Not having total control over placement of the struct means we can't guarantee the cacheline boundary, we need to align the kmalloc and then each struct, but the grouping should help.) v2: Try a couple of different names for the state touched by the user interrupt handler. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++---- drivers/gpu/drm/i915/i915_drv.h | 6 +++--- drivers/gpu/drm/i915/i915_irq.c | 12 ++++++------ drivers/gpu/drm/i915/intel_breadcrumbs.c | 28 ++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 18 ++++++++++-------- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a59e0caeda64..3d05cae0e92d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -793,8 +793,8 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - seq_printf(m, "Current user interrupts (%s): %x\n", - engine->name, READ_ONCE(engine->user_interrupts)); + seq_printf(m, "Current user interrupts (%s): %lx\n", + engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups)); spin_lock(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -1442,9 +1442,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) engine->last_submitted_seqno); seq_printf(m, "\twaiters? %d\n", intel_engine_has_waiter(engine)); - seq_printf(m, "\tuser interrupts = %x [current %x]\n", + seq_printf(m, "\tuser interrupts = %lx [current %lx]\n", engine->hangcheck.user_interrupts, - READ_ONCE(engine->user_interrupts)); + READ_ONCE(engine->breadcrumbs.irq_wakeups)); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 11e9769411e9..d2c60992a508 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3998,8 +3998,8 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - READ_ONCE(engine->breadcrumbs.tasklet) == current && - cmpxchg_relaxed(&engine->irq_posted, 1, 0)) { + READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current && + cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { struct task_struct *tsk; /* The ordering of irq_posted versus applying the barrier @@ -4023,7 +4023,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * irq_posted == false but we are still running). */ rcu_read_lock(); - tsk = READ_ONCE(engine->breadcrumbs.tasklet); + tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); if (tsk && tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b77d808b71cd..a69a5fc75bd6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -977,10 +977,10 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - smp_store_mb(engine->irq_posted, true); + smp_store_mb(engine->breadcrumbs.irq_posted, true); if (intel_engine_wakeup(engine)) { trace_i915_gem_request_notify(engine); - engine->user_interrupts++; + engine->breadcrumbs.irq_wakeups++; } } @@ -3054,12 +3054,12 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static unsigned kick_waiters(struct intel_engine_cs *engine) +static unsigned long kick_waiters(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - unsigned user_interrupts = READ_ONCE(engine->user_interrupts); + unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups); - if (engine->hangcheck.user_interrupts == user_interrupts && + if (engine->hangcheck.user_interrupts == irq_count && !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) DRM_ERROR("Hangcheck timer elapsed... %s idle\n", @@ -3068,7 +3068,7 @@ static unsigned kick_waiters(struct intel_engine_cs *engine) intel_engine_enable_fake_irq(engine); } - return user_interrupts; + return irq_count; } /* * This is called when the chip hasn't reported back with completed diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 6fcbb52e50fb..d89b2c963618 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -49,7 +49,7 @@ static void irq_enable(struct intel_engine_cs *engine) * we still need to force the barrier before reading the seqno, * just in case. */ - engine->irq_posted = true; + engine->breadcrumbs.irq_posted = true; spin_lock_irq(&engine->i915->irq_lock); engine->irq_enable(engine); @@ -62,7 +62,7 @@ static void irq_disable(struct intel_engine_cs *engine) engine->irq_disable(engine); spin_unlock_irq(&engine->i915->irq_lock); - engine->irq_posted = false; + engine->breadcrumbs.irq_posted = false; } static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) @@ -195,7 +195,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !b->tasklet); + GEM_BUG_ON(!first && !b->irq_seqno_bh); if (completed) { struct rb_node *next = rb_next(completed); @@ -204,7 +204,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (next && next != &wait->node) { GEM_BUG_ON(first); b->first_wait = to_wait(next); - smp_store_mb(b->tasklet, b->first_wait->tsk); + smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current * seqno, processing the completed tasks and selecting * the next waiter, we may have missed the interrupt @@ -216,7 +216,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * in case the seqno passed. */ __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(engine->irq_posted)) + if (READ_ONCE(b->irq_posted)) wake_up_process(to_wait(next)->tsk); } @@ -230,18 +230,18 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); b->first_wait = wait; - smp_store_mb(b->tasklet, wait->tsk); + smp_store_mb(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, * or if there was a previous waiter (for a later seqno) they * may be woken instead of us (due to the inherent race - * in the unlocked read of b->tasklet in the irq handler) and - * so we miss the wake up. + * in the unlocked read of b->irq_seqno_bh in the irq handler) + * and so we miss the wake up. */ __intel_breadcrumbs_enable_irq(b); } - GEM_BUG_ON(!b->tasklet); + GEM_BUG_ON(!b->irq_seqno_bh); GEM_BUG_ON(!b->first_wait); GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); @@ -301,7 +301,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(b->tasklet != wait->tsk); + GEM_BUG_ON(b->irq_seqno_bh != wait->tsk); /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest @@ -344,13 +344,13 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * exception rather than a seqno completion. */ b->first_wait = to_wait(next); - smp_store_mb(b->tasklet, b->first_wait->tsk); + smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->tasklet); + wake_up_process(b->irq_seqno_bh); } else { b->first_wait = NULL; - WRITE_ONCE(b->tasklet, NULL); + WRITE_ONCE(b->irq_seqno_bh, NULL); __intel_breadcrumbs_disable_irq(b); } } else { @@ -364,7 +364,7 @@ out_unlock: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters)); + GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters)); spin_unlock(&b->lock); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 121294c602c3..12cb7ed90014 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -74,8 +74,8 @@ enum intel_ring_hangcheck_action { struct intel_ring_hangcheck { u64 acthd; + unsigned long user_interrupts; u32 seqno; - unsigned user_interrupts; int score; enum intel_ring_hangcheck_action action; int deadlock; @@ -167,16 +167,20 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { + struct task_struct *irq_seqno_bh; /* bh for user interrupts */ + unsigned long irq_wakeups; + bool irq_posted; + spinlock_t lock; /* protects the lists of requests */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ - struct task_struct *tasklet; /* bh for user interrupts */ struct task_struct *signaler; /* used for fence signalling */ struct drm_i915_gem_request *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ - bool irq_enabled; - bool rpm_wakelock; + + bool irq_enabled : 1; + bool rpm_wakelock : 1; } breadcrumbs; /* @@ -189,7 +193,6 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; - bool irq_posted; u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ void (*irq_enable)(struct intel_engine_cs *ring); @@ -319,7 +322,6 @@ struct intel_engine_cs { * inspecting request list. */ u32 last_submitted_seqno; - unsigned user_interrupts; bool gpu_caches_dirty; @@ -543,13 +545,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.tasklet); + return READ_ONCE(engine->breadcrumbs.irq_seqno_bh); } static inline bool intel_engine_wakeup(struct intel_engine_cs *engine) { bool wakeup = false; - struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.tasklet); + struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); /* Note that for this not to dangerously chase a dangling pointer, * the caller is responsible for ensure that the task remain valid for * wake_up_process() i.e. that the RCU grace period cannot expire. -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx