On Mon, Jan 26, 2015 at 04:43:24AM -0800, Rodrigo Vivi wrote: > From: Dave Gordon <david.s.gordon@xxxxxxxxx> > > On some generations of chips, it is necessary to read an MMIO register > before getting the sequence number from the status page in main memory, > in order to ensure coherency; and on all generations this should be > either helpful or harmless. > > In general, we want this operation to be the cheapest possible, since > we require only the side-effect of DMA completion and don't interpret > the result of the read, and don't require any coordination with other > threads, power domains, or anything else. > > However, finding a suitable register may be problematic; on GEN6 chips > the ACTHD register was used, but on VLV et al access to this register > requires FORCEWAKE and therefore many complications involving spinlocks > and polling. > > So this commit introduces this synchronising operation as a distinct > vfunc in the engine structure, so that it can be GEN- or chip-specific > if needed. > > And there are three implementations; a dummy one, for chips where no > synchronising read is needed, a gen6(+) version that issues a posting > read (to TAIL), and a VLV-specific one that issues a raw read instead, > avoiding touching FORCEWAKE and GTFIFO and other such complications. > > We then change gen6_ring_get_seqno() to use this new irq_barrier rather > than a POSTING_READ of ACTHD. Note that both older (pre-GEN6) and newer > (GEN8+) devices running in LRC mode do not currently include any posting > read in their own get_seqno() implementations, so this change only > makes a difference on VLV (and not CHV+). > > Signed-off-by: Dave Gordon <david.s.gordon@xxxxxxxxx> > Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@xxxxxxxxx) > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_ringbuffer.c | 37 +++++++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + > 2 files changed, 36 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 23020d6..97473ed 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1227,6 +1227,28 @@ pc_render_add_request(struct intel_engine_cs *ring) > return 0; > } > > +static void > +dummy_irq_barrier(struct intel_engine_cs *ring) > +{ > +} > + > +static void > +gen6_irq_barrier(struct intel_engine_cs *ring) > +{ > + struct drm_i915_private *dev_priv = to_i915(ring->dev); > + POSTING_READ(RING_TAIL(ring->mmio_base)); > +} > + > +#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__)) > +#define RAW_POSTING_READ(reg__) (void)__raw_i915_read32(dev_priv, reg__) > + > +static void > +vlv_irq_barrier(struct intel_engine_cs *ring) > +{ > + struct drm_i915_private *dev_priv = to_i915(ring->dev); > + RAW_POSTING_READ(RING_TAIL(ring->mmio_base)); > +} > + > static u32 > gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > { > @@ -1234,8 +1256,7 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) > * ivb (and maybe also on snb) by reading from a CS register (like > * ACTHD) before reading the status page. */ > if (!lazy_coherency) { > - struct drm_i915_private *dev_priv = ring->dev->dev_private; > - POSTING_READ(RING_ACTHD(ring->mmio_base)); > + ring->irq_barrier(ring); > } Imo just do a vlv_ring_get_seqno if this is a problem. Adding a vfunc with mostly empty or same implemenation to another very tiny vfunc isn't doing a whole lot of good to the codebase. -Daniel > > return intel_read_status_page(ring, I915_GEM_HWS_INDEX); > @@ -2393,6 +2414,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > ring->irq_get = gen8_ring_get_irq; > ring->irq_put = gen8_ring_put_irq; > ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; > + ring->irq_barrier = gen6_irq_barrier; > ring->get_seqno = gen6_ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (i915_semaphore_is_enabled(dev)) { > @@ -2409,6 +2431,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > ring->irq_get = gen6_ring_get_irq; > ring->irq_put = gen6_ring_put_irq; > ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; > + if (IS_VALLEYVIEW(dev) && !IS_GEN8(dev)) > + ring->irq_barrier = vlv_irq_barrier; > + else > + ring->irq_barrier = gen6_irq_barrier; > ring->get_seqno = gen6_ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (i915_semaphore_is_enabled(dev)) { > @@ -2435,6 +2461,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > } else if (IS_GEN5(dev)) { > ring->add_request = pc_render_add_request; > ring->flush = gen4_render_ring_flush; > + ring->irq_barrier = dummy_irq_barrier; > ring->get_seqno = pc_render_get_seqno; > ring->set_seqno = pc_render_set_seqno; > ring->irq_get = gen5_ring_get_irq; > @@ -2447,6 +2474,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) > ring->flush = gen2_render_ring_flush; > else > ring->flush = gen4_render_ring_flush; > + ring->irq_barrier = dummy_irq_barrier; > ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (IS_GEN2(dev)) { > @@ -2523,6 +2551,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) > ring->write_tail = gen6_bsd_ring_write_tail; > ring->flush = gen6_bsd_ring_flush; > ring->add_request = gen6_add_request; > + ring->irq_barrier = gen6_irq_barrier; > ring->get_seqno = gen6_ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (INTEL_INFO(dev)->gen >= 8) { > @@ -2562,6 +2591,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) > ring->mmio_base = BSD_RING_BASE; > ring->flush = bsd_ring_flush; > ring->add_request = i9xx_add_request; > + ring->irq_barrier = dummy_irq_barrier; > ring->get_seqno = ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (IS_GEN5(dev)) { > @@ -2601,6 +2631,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) > ring->mmio_base = GEN8_BSD2_RING_BASE; > ring->flush = gen6_bsd_ring_flush; > ring->add_request = gen6_add_request; > + ring->irq_barrier = gen6_irq_barrier; > ring->get_seqno = gen6_ring_get_seqno; > ring->set_seqno = ring_set_seqno; > ring->irq_enable_mask = > @@ -2631,6 +2662,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) > ring->write_tail = ring_write_tail; > ring->flush = gen6_ring_flush; > ring->add_request = gen6_add_request; > + ring->irq_barrier = gen6_irq_barrier; > ring->get_seqno = gen6_ring_get_seqno; > ring->set_seqno = ring_set_seqno; > if (INTEL_INFO(dev)->gen >= 8) { > @@ -2688,6 +2720,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) > ring->write_tail = ring_write_tail; > ring->flush = gen6_ring_flush; > ring->add_request = gen6_add_request; > + ring->irq_barrier = gen6_irq_barrier; > ring->get_seqno = gen6_ring_get_seqno; > ring->set_seqno = ring_set_seqno; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 6dbb6f4..f686929 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -163,6 +163,7 @@ struct intel_engine_cs { > * seen value is good enough. Note that the seqno will always be > * monotonic, even if not coherent. > */ > + void (*irq_barrier)(struct intel_engine_cs *ring); > u32 (*get_seqno)(struct intel_engine_cs *ring, > bool lazy_coherency); > void (*set_seqno)(struct intel_engine_cs *ring, > -- > 1.9.3 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx