Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > In order to simplify future patches, extract the > lazy_coherency optimisation our of the engine->get_seqno() vfunc into > its own callback. > > v2: Rename the barrier to engine->irq_seqno_barrier to try and better > reflect that the barrier is only required after the user interrupt before > reading the seqno (to ensure that the seqno update lands in time as we > do not have strict seqno-irq ordering on all platforms). > > Reviewed-by: Dave Gordon <david.s.gordon@xxxxxxxxx> [#v2] > > v3: Comments for hangcheck paranoia. Mika wanted to keep the extra > barrier inside the hangcheck, just in case. I can argue that it doesn't > provide a barrier against anything, but the side-effects of applying the > barrier may prevent a false declaration of a hung GPU. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > Cc: Dave Gordon <david.s.gordon@xxxxxxxxx> > --- Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > drivers/gpu/drm/i915/i915_debugfs.c | 6 +++--- > drivers/gpu/drm/i915/i915_drv.h | 12 ++++++++---- > drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- > drivers/gpu/drm/i915/i915_irq.c | 14 ++++++++++++-- > drivers/gpu/drm/i915/i915_trace.h | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 18 ++++++----------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 34 +++++++++++++++++---------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- > 8 files changed, 51 insertions(+), 41 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index ec0c2a05eed6..c4df580ed0de 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) > ring->name, > i915_gem_request_get_seqno(work->flip_queued_req), > dev_priv->next_seqno, > - ring->get_seqno(ring, true), > + ring->get_seqno(ring), > i915_gem_request_completed(work->flip_queued_req, true)); > } else > seq_printf(m, "Flip not associated with any ring\n"); > @@ -732,7 +732,7 @@ static void i915_ring_seqno_info(struct seq_file *m, > { > if (ring->get_seqno) { > seq_printf(m, "Current sequence (%s): %x\n", > - ring->name, ring->get_seqno(ring, false)); > + ring->name, ring->get_seqno(ring)); > } > } > > @@ -1342,8 +1342,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) > intel_runtime_pm_get(dev_priv); > > for_each_ring(ring, dev_priv, i) { > - seqno[i] = ring->get_seqno(ring, false); > acthd[i] = intel_ring_get_active_head(ring); > + seqno[i] = ring->get_seqno(ring); > } > > i915_get_extra_instdone(dev, instdone); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 20d9dbd9f9cf..525f39c0551b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3001,15 +3001,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) > static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, > bool lazy_coherency) > { > - u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency); > - return i915_seqno_passed(seqno, req->previous_seqno); > + if (!lazy_coherency && req->ring->irq_seqno_barrier) > + req->ring->irq_seqno_barrier(req->ring); > + return i915_seqno_passed(req->ring->get_seqno(req->ring), > + req->previous_seqno); > } > > static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, > bool lazy_coherency) > { > - u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency); > - return i915_seqno_passed(seqno, req->seqno); > + if (!lazy_coherency && req->ring->irq_seqno_barrier) > + req->ring->irq_seqno_barrier(req->ring); > + return i915_seqno_passed(req->ring->get_seqno(req->ring), > + req->seqno); > } > > int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 978c026963b8..33f1e0636a03 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -906,8 +906,8 @@ static void i915_record_ring_state(struct drm_device *dev, > > ering->waiting = waitqueue_active(&ring->irq_queue); > ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base)); > - ering->seqno = ring->get_seqno(ring, false); > ering->acthd = intel_ring_get_active_head(ring); > + ering->seqno = ring->get_seqno(ring); > ering->start = I915_READ_START(ring); > ering->head = I915_READ_HEAD(ring); > ering->tail = I915_READ_TAIL(ring); > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 25a89373df63..07bc2cdd6252 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2937,7 +2937,7 @@ static int semaphore_passed(struct intel_engine_cs *ring) > if (signaller->hangcheck.deadlock >= I915_NUM_RINGS) > return -1; > > - if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno)) > + if (i915_seqno_passed(signaller->get_seqno(signaller), seqno)) > return 1; > > /* cursory check for an unkickable deadlock */ > @@ -3101,8 +3101,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > > semaphore_clear_deadlocks(dev_priv); > > - seqno = ring->get_seqno(ring, false); > + /* We don't strictly need an irq-barrier here, as we are not > + * serving an interrupt request, be paranoid in case the > + * barrier has side-effects (such as preventing a broken > + * cacheline snoop) and so be sure that we can see the seqno > + * advance. If the seqno should stick, due to a stale > + * cacheline, we would erroneously declare the GPU hung. > + */ > + if (ring->irq_seqno_barrier) > + ring->irq_seqno_barrier(ring); > + > acthd = intel_ring_get_active_head(ring); > + seqno = ring->get_seqno(ring); > > if (ring->hangcheck.seqno == seqno) { > if (ring_idle(ring, seqno)) { > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h > index 52b2d409945d..cfb5f78a6e84 100644 > --- a/drivers/gpu/drm/i915/i915_trace.h > +++ b/drivers/gpu/drm/i915/i915_trace.h > @@ -573,7 +573,7 @@ TRACE_EVENT(i915_gem_request_notify, > TP_fast_assign( > __entry->dev = ring->dev->primary->index; > __entry->ring = ring->id; > - __entry->seqno = ring->get_seqno(ring, false); > + __entry->seqno = ring->get_seqno(ring); > ), > > TP_printk("dev=%u, ring=%u, seqno=%u", > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 3a03646e343d..0e833d470c5e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1837,7 +1837,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, > return 0; > } > > -static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > +static u32 gen8_get_seqno(struct intel_engine_cs *ring) > { > return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > } > @@ -1847,9 +1847,8 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) > intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); > } > > -static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > +static void bxt_seqno_barrier(struct intel_engine_cs *ring) > { > - > /* > * On BXT A steppings there is a HW coherency issue whereby the > * MI_STORE_DATA_IMM storing the completed request's seqno > @@ -1860,11 +1859,7 @@ static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > * bxt_a_set_seqno(), where we also do a clflush after the write. So > * this clflush in practice becomes an invalidate operation. > */ > - > - if (!lazy_coherency) > - intel_flush_status_page(ring, I915_GEM_HWS_INDEX); > - > - return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > + intel_flush_status_page(ring, I915_GEM_HWS_INDEX); > } > > static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno) > @@ -2034,12 +2029,11 @@ logical_ring_default_vfuncs(struct drm_device *dev, > ring->irq_get = gen8_logical_ring_get_irq; > ring->irq_put = gen8_logical_ring_put_irq; > ring->emit_bb_start = gen8_emit_bb_start; > + ring->get_seqno = gen8_get_seqno; > + ring->set_seqno = gen8_set_seqno; > if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { > - ring->get_seqno = bxt_a_get_seqno; > + ring->irq_seqno_barrier = bxt_seqno_barrier; > ring->set_seqno = bxt_a_set_seqno; > - } else { > - ring->get_seqno = gen8_get_seqno; > - ring->set_seqno = gen8_set_seqno; > } > } > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index abac0560d075..573203618dfe 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1548,8 +1548,8 @@ pc_render_add_request(struct drm_i915_gem_request *req) > return 0; > } > > -static u32 > -gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > +static void > +gen6_seqno_barrier(struct intel_engine_cs *ring) > { > /* Workaround to force correct ordering between irq and seqno writes on > * ivb (and maybe also on snb) by reading from a CS register (like > @@ -1563,16 +1563,12 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > * batch i.e. much more frequent than a delay when waiting for the > * interrupt (with the same net latency). > */ > - if (!lazy_coherency) { > - struct drm_i915_private *dev_priv = ring->dev->dev_private; > - POSTING_READ_FW(RING_ACTHD(ring->mmio_base)); > - } > - > - return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > + struct drm_i915_private *dev_priv = to_i915(ring->dev); > + POSTING_READ_FW(RING_ACTHD(ring->mmio_base)); > } > > static u32 > -ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > +ring_get_seqno(struct intel_engine_cs *ring) > { > return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > } > @@ -1584,7 +1580,7 @@ ring_set_seqno(struct intel_engine_cs *ring, u32 seqno) > } > > static u32 > -pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > +pc_render_get_seqno(struct intel_engine_cs *ring) > { > return ring->scratch.cpu_page[0]; > } > @@ -2783,7 +2779,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > ring->irq_get = gen8_ring_get_irq; > ring->irq_put = gen8_ring_put_irq; > ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; > - ring->get_seqno = gen6_ring_get_seqno; > + ring->irq_seqno_barrier = gen6_seqno_barrier; > + ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (i915_semaphore_is_enabled(dev)) { > WARN_ON(!dev_priv->semaphore_obj); > @@ -2800,7 +2797,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > ring->irq_get = gen6_ring_get_irq; > ring->irq_put = gen6_ring_put_irq; > ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; > - ring->get_seqno = gen6_ring_get_seqno; > + ring->irq_seqno_barrier = gen6_seqno_barrier; > + ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (i915_semaphore_is_enabled(dev)) { > ring->semaphore.sync_to = gen6_ring_sync; > @@ -2915,7 +2913,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) > ring->write_tail = gen6_bsd_ring_write_tail; > ring->flush = gen6_bsd_ring_flush; > ring->add_request = gen6_add_request; > - ring->get_seqno = gen6_ring_get_seqno; > + ring->irq_seqno_barrier = gen6_seqno_barrier; > + ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (INTEL_INFO(dev)->gen >= 8) { > ring->irq_enable_mask = > @@ -2988,7 +2987,8 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) > ring->mmio_base = GEN8_BSD2_RING_BASE; > ring->flush = gen6_bsd_ring_flush; > ring->add_request = gen6_add_request; > - ring->get_seqno = gen6_ring_get_seqno; > + ring->irq_seqno_barrier = gen6_seqno_barrier; > + ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > ring->irq_enable_mask = > GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; > @@ -3019,7 +3019,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) > ring->write_tail = ring_write_tail; > ring->flush = gen6_ring_flush; > ring->add_request = gen6_add_request; > - ring->get_seqno = gen6_ring_get_seqno; > + ring->irq_seqno_barrier = gen6_seqno_barrier; > + ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (INTEL_INFO(dev)->gen >= 8) { > ring->irq_enable_mask = > @@ -3077,7 +3078,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) > ring->write_tail = ring_write_tail; > ring->flush = gen6_ring_flush; > ring->add_request = gen6_add_request; > - ring->get_seqno = gen6_ring_get_seqno; > + ring->irq_seqno_barrier = gen6_seqno_barrier; > + ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > > if (INTEL_INFO(dev)->gen >= 8) { > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 566b0ae10ce0..4cea04491392 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -196,8 +196,8 @@ struct intel_engine_cs { > * seen value is good enough. Note that the seqno will always be > * monotonic, even if not coherent. > */ > - u32 (*get_seqno)(struct intel_engine_cs *ring, > - bool lazy_coherency); > + void (*irq_seqno_barrier)(struct intel_engine_cs *ring); > + u32 (*get_seqno)(struct intel_engine_cs *ring); > void (*set_seqno)(struct intel_engine_cs *ring, > u32 seqno); > int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, > -- > 2.7.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx