Re: [PATCH v3 3/5] drm/i915: Make sprite updates atomic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Jan 26, 2014 at 06:29:06PM +0100, Daniel Vetter wrote:
> On Tue, Jan 21, 2014 at 04:12:38PM +0200, ville.syrjala@xxxxxxxxxxxxxxx wrote:
> > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> >
> > Add a mechanism by which we can evade the leading edge of vblank. This
> > guarantees that no two sprite register writes will straddle on either
> > side of the vblank start, and that means all the writes will be latched
> > together in one atomic operation.
> >
> > We do the vblank evade by checking the scanline counter, and if it's too
> > close to the start of vblank (too close has been hardcoded to 100usec
> > for now), we will wait for the vblank start to pass. In order to
> > eliminate random delayes from the rest of the system, we operate with
> > interrupts disabled, except when waiting for the vblank obviously.
> >
> > Note that we now go digging through pipe_to_crtc_mapping[] in the
> > vblank interrupt handler, which is a bit dangerous since we set up
> > interrupts before the crtcs. However in this case since it's the vblank
> > interrupt, we don't actually unmask it until some piece of code
> > requests it.
> >
> > v2: preempt_check_resched() calls after local_irq_enable() (Jesse)
> >     Hook up the vblank irq stuff on BDW as well
> > v3: Pass intel_crtc instead of drm_crtc (Daniel)
> >     Warn if crtc.mutex isn't locked (Daniel)
> >     Add an explicit compiler barrier and document the barriers (Daniel)
> >     Note the irq vs. modeset setup madness in the commit message (Daniel)
> >
> > Reviewed-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx>
> > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> > ---
> >  drivers/gpu/drm/i915/i915_irq.c      |  29 ++++++++--
> >  drivers/gpu/drm/i915/intel_display.c |   2 +
> >  drivers/gpu/drm/i915/intel_drv.h     |   3 +
> >  drivers/gpu/drm/i915/intel_sprite.c  | 103 +++++++++++++++++++++++++++++++++++
> >  4 files changed, 133 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index ffb56a9..8ef9edb 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1439,6 +1439,15 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
> >   }
> >  }
> >  
> > +static void intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe)
> > +{
> > + struct intel_crtc *intel_crtc =
> > + to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
> > +
> > + intel_crtc->vbl_received = true;
> > + wake_up(&intel_crtc->vbl_wait);
> > +}
> > +
> >  static irqreturn_t valleyview_irq_handler(int irq, void *arg)
> >  {
> >   struct drm_device *dev = (struct drm_device *) arg;
> > @@ -1479,8 +1488,10 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
> >   spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
> >  
> >   for_each_pipe(pipe) {
> > - if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS)
> > + if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS) {
> >   drm_handle_vblank(dev, pipe);
> > + intel_pipe_handle_vblank(dev, pipe);
> > + }
> >  
> >   if (pipe_stats[pipe] & PLANE_FLIPDONE_INT_STATUS_VLV) {
> >   intel_prepare_page_flip(dev, pipe);
> > @@ -1679,8 +1690,10 @@ static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir)
> >   DRM_ERROR("Poison interrupt\n");
> >  
> >   for_each_pipe(pipe) {
> > - if (de_iir & DE_PIPE_VBLANK(pipe))
> > + if (de_iir & DE_PIPE_VBLANK(pipe)) {
> >   drm_handle_vblank(dev, pipe);
> > + intel_pipe_handle_vblank(dev, pipe);
> > + }
> >  
> >   if (de_iir & DE_PIPE_FIFO_UNDERRUN(pipe))
> >   if (intel_set_cpu_fifo_underrun_reporting(dev, pipe, false))
> > @@ -1729,8 +1742,10 @@ static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir)
> >   intel_opregion_asle_intr(dev);
> >  
> >   for_each_pipe(i) {
> > - if (de_iir & (DE_PIPE_VBLANK_IVB(i)))
> > + if (de_iir & (DE_PIPE_VBLANK_IVB(i))) {
> >   drm_handle_vblank(dev, i);
> > + intel_pipe_handle_vblank(dev, i);
> > + }
> >  
> >   /* plane/pipes map 1:1 on ilk+ */
> >   if (de_iir & DE_PLANE_FLIP_DONE_IVB(i)) {
> > @@ -1872,8 +1887,10 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
> >   continue;
> >  
> >   pipe_iir = I915_READ(GEN8_DE_PIPE_IIR(pipe));
> > - if (pipe_iir & GEN8_PIPE_VBLANK)
> > + if (pipe_iir & GEN8_PIPE_VBLANK) {
> >   drm_handle_vblank(dev, pipe);
> > + intel_pipe_handle_vblank(dev, pipe);
> > + }
> >  
> >   if (pipe_iir & GEN8_PIPE_FLIP_DONE) {
> >   intel_prepare_page_flip(dev, pipe);
> > @@ -3161,6 +3178,8 @@ static bool i8xx_handle_vblank(struct drm_device *dev,
> >   if (!drm_handle_vblank(dev, pipe))
> >   return false;
> >  
> > + intel_pipe_handle_vblank(dev, pipe);
> > +
> >   if ((iir & flip_pending) == 0)
> >   return false;
> >  
> > @@ -3344,6 +3363,8 @@ static bool i915_handle_vblank(struct drm_device *dev,
> >   if (!drm_handle_vblank(dev, pipe))
> >   return false;
> >  
> > + intel_pipe_handle_vblank(dev, pipe);
> > +
> >   if ((iir & flip_pending) == 0)
> >   return false;
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index d1cc7ea..b39e445 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -10297,6 +10297,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
> >   intel_crtc->plane = !pipe;
> >   }
> >  
> > + init_waitqueue_head(&intel_crtc->vbl_wait);
> > +
> >   BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
> >         dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
> >   dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base;
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 318bb4c..e539550 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -376,6 +376,9 @@ struct intel_crtc {
> >   /* watermarks currently being used  */
> >   struct intel_pipe_wm active;
> >   } wm;
> > +
> > + wait_queue_head_t vbl_wait;
> > + bool vbl_received;
> >  };
> >  
> >  struct intel_plane_wm_parameters {
> > diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> > index ed9fa7c..1ffa7ba 100644
> > --- a/drivers/gpu/drm/i915/intel_sprite.c
> > +++ b/drivers/gpu/drm/i915/intel_sprite.c
> > @@ -37,6 +37,79 @@
> >  #include <drm/i915_drm.h>
> >  #include "i915_drv.h"
> >  
> > +static unsigned int usecs_to_scanlines(const struct drm_display_mode *mode, unsigned int usecs)
> > +{
> > + /* paranoia */
> > + if (!mode->crtc_htotal)
> > + return 1;
> > +
> > + return DIV_ROUND_UP(usecs * mode->crtc_clock, 1000 * mode->crtc_htotal);
> > +}
> > +
> > +static void intel_pipe_update_start(struct intel_crtc *crtc)
> > +{
> > + struct drm_device *dev = crtc->base.dev;
> > + const struct drm_display_mode *mode = &crtc->config.adjusted_mode;
> > + enum pipe pipe = crtc->pipe;
> > + /* FIXME needs to be calibrated sensibly */
> > + unsigned int min = mode->crtc_vblank_start - usecs_to_scanlines(mode, 100);
> > + unsigned int max = mode->crtc_vblank_start - 1;
> > + long timeout = msecs_to_jiffies_timeout(1);
> > + unsigned int scanline;
> > +
> > + WARN_ON(!mutex_is_locked(&crtc->base.mutex));
> > +
> > + if (WARN_ON(drm_vblank_get(dev, pipe)))
> > + return;
> > +
> > + local_irq_disable();
> > +
> > + /*
> > + * Explicit compiler barrier is used to make sure that vbl_received
> > + * store happens before reading the current scanline, otherwise
> > + * something like this this might happen:
> > + *
> > + * 1. scanline = intel_get_crtc_scanline();
> > + * 2. vblank irq -> vbl_received = true
> > + * 3. vbl_received = false
> > + *
> > + * Which could make us miss the fact that we already crossed into
> > + * vblank even though the scanline indicates that we're somewhere
> > + * just before the start of vblank.
> > + *
> > + * wake_up() vs. wait_event_timeout() imply the necessary memory
> > + * barries to make sure wait_event_timeout() sees the correct
> > + * value of vbl_received after waking up.
> > + */
> > + crtc->vbl_received = false;
> > + barrier();
> > + scanline = intel_get_crtc_scanline(&crtc->base);
> 
> Ok, took a bit longer but here's it. I think we need a bit more polish
> with this one. My thoughts about all this:
> 
> - Definitely an s/barrier/smp_*/ required. Yeah, on x86 with the total
>   store order it's ok, but a big reason for adding barriers and comments
>   is documentation. And a big reason for that is to deter people from too
>   many lockless tricks ;-)

I still don't see why you'd need an smp_ memory barrier here. It's just
one regular store vs. mmio read ordering issue we have here.

If we'd really need to worry about hardware reordering the operations,
then it would seem that we'd need a mandatory memory barrier instead
of an smp one.

> 
> - You still write from both process context and irq context, which makes
>   thinking about anything lockless much harder. I prefer a strict
>   consumer/producer relationship for lockless tricks. We can get there by
>   tracking the absolute framecounter instead of the vbl_received edge
>   signal.
> 
> - Finally I don't trust our hw to get the coherency right between all
>   this. Especially since the actual vblank interrupt is so neatly
>   ill-defined.

My observation is that the vblank ISR behaves very well on PCH platforms.
On earlier platforms it's a bit more unclear whether you get the start
of vblank, or a slightly delayed version. But that should not matter
anyway, since if we end up waiting, the delay is not a problem, and if
we don't end up waiting, well then it doesn't matter when the event
would really trigger. The only concern in the latter case would be where
the hardware would really latch the new register values. Ie. what
happens if the write lands in the short window between the start of
vblank, and when the vblank event triggers.

Hmm. In fact the current code might not do the right thing on platforms
where we have the scanling=vbl-1 issue w/o a working ISR workaround
(gen2, ctg, vlv). That's a bit sad. Not sure if there's any other fix than
to add a slight delay to double check the scanline in that case case :(
ATM it might fall through to the to 1 ms timeout, which should actually
be OK as long as it doesn't get scheculed back in exactly in the danger
zone for the next vblank. Anyway the code is relying on that even in the
case where it decides to wait for the vblank interrupt. Of course it
would be possible to retry until we're guaranteed to be in the clear,
but then there's no guarantee of forward progress, which doesn't sound
very nice. I guess one option would to do a interrupts off polling wait
the second time around to make sure we get it right then.

> So I think we should actually base our decisions whether we
>   need to block still or whether we've just missed the interrupt on the
>   hardware frame counter. Which means we also need a seqlock-like loop to
>   grab the scanline:
>   1. read hw framecounter
>   2. read scanline
>   3. read hw framecounter again, if they mismatch go back to 1.

Or just figure out which side of vblank start we're at. Well, that
might only work on PCH platforms since on CTG we can't know which side
we're at if the scanline counter says vbl_start-1 (due to ISR being
useless). On gen3/4 we already have an atomic read of the pixel counter
with the frame counter (with another kind of seq lock thingy due to
the high+low frame counter split).

> 
>   1-3. could be done with irqs disabled for better behaviour
> 
>   Then if we are in the danger scanline zone we can just add 1 to the
>   read-out framecounter - since the hw guarantees to update all registers
>   atomically it's hopefully save ...

We could use the frame counter sure, except on gen2 where we'd need to
cook up a software frame counter then. Well, basically I'd just have to
replace vbl_received w/ an atomic counter.

> 
> - A better plan on more recent platforms might actually be to read out the
>   vblank timestamp register and compare that with the reference clock. If
>   it's too near we'll block for the vblank. That way we also address the
>   neat Bspec language that the vblank update is done ... somewhere not
>   quite specified exactly.

More codepaths and more bugs. Also as the scanline counter is reliable
on PCH platforms I don't see any benefit at this point. It can be done
later if deemed useful.

> 
> So with my ignorant opinion dumped, what's your stance on all this? ;-)
> 
> Cheers, Daniel
> 
> > +
> > + while (scanline >= min && scanline <= max && timeout > 0) {
> > + local_irq_enable();
> > + preempt_check_resched();
> > +
> > + timeout = wait_event_timeout(crtc->vbl_wait,
> > +     crtc->vbl_received,
> > +     timeout);
> > +
> > + local_irq_disable();
> > +
> > + /* ditto */
> > + crtc->vbl_received = false;
> > + barrier();
> > + scanline = intel_get_crtc_scanline(&crtc->base);
> > + }
> > +
> > + drm_vblank_put(dev, pipe);
> > +}
> > +
> > +static void intel_pipe_update_end(struct intel_crtc *crtc)
> > +{
> > + local_irq_enable();
> > + preempt_check_resched();
> > +}
> > +
> >  static void
> >  vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
> >   struct drm_framebuffer *fb,
> > @@ -48,6 +121,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
> >   struct drm_device *dev = dplane->dev;
> >   struct drm_i915_private *dev_priv = dev->dev_private;
> >   struct intel_plane *intel_plane = to_intel_plane(dplane);
> > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   int pipe = intel_plane->pipe;
> >   int plane = intel_plane->plane;
> >   u32 sprctl;
> > @@ -131,6 +205,8 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
> >   fb->pitches[0]);
> >   linear_offset -= sprsurf_offset;
> >  
> > + intel_pipe_update_start(intel_crtc);
> > +
> >   I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
> >   I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
> >  
> > @@ -144,6 +220,8 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
> >   I915_MODIFY_DISPBASE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
> >       sprsurf_offset);
> >   POSTING_READ(SPSURF(pipe, plane));
> > +
> > + intel_pipe_update_end(intel_crtc);
> >  }
> >  
> >  static void
> > @@ -152,15 +230,20 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
> >   struct drm_device *dev = dplane->dev;
> >   struct drm_i915_private *dev_priv = dev->dev_private;
> >   struct intel_plane *intel_plane = to_intel_plane(dplane);
> > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   int pipe = intel_plane->pipe;
> >   int plane = intel_plane->plane;
> >  
> > + intel_pipe_update_start(intel_crtc);
> > +
> >   I915_WRITE(SPCNTR(pipe, plane), I915_READ(SPCNTR(pipe, plane)) &
> >     ~SP_ENABLE);
> >   /* Activate double buffered register update */
> >   I915_MODIFY_DISPBASE(SPSURF(pipe, plane), 0);
> >   POSTING_READ(SPSURF(pipe, plane));
> >  
> > + intel_pipe_update_end(intel_crtc);
> > +
> >   intel_update_sprite_watermarks(dplane, crtc, 0, 0, false, false);
> >  }
> >  
> > @@ -226,6 +309,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
> >   struct drm_device *dev = plane->dev;
> >   struct drm_i915_private *dev_priv = dev->dev_private;
> >   struct intel_plane *intel_plane = to_intel_plane(plane);
> > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   int pipe = intel_plane->pipe;
> >   u32 sprctl, sprscale = 0;
> >   unsigned long sprsurf_offset, linear_offset;
> > @@ -299,6 +383,8 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
> >         pixel_size, fb->pitches[0]);
> >   linear_offset -= sprsurf_offset;
> >  
> > + intel_pipe_update_start(intel_crtc);
> > +
> >   I915_WRITE(SPRSTRIDE(pipe), fb->pitches[0]);
> >   I915_WRITE(SPRPOS(pipe), (crtc_y << 16) | crtc_x);
> >  
> > @@ -318,6 +404,8 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
> >   I915_MODIFY_DISPBASE(SPRSURF(pipe),
> >       i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
> >   POSTING_READ(SPRSURF(pipe));
> > +
> > + intel_pipe_update_end(intel_crtc);
> >  }
> >  
> >  static void
> > @@ -326,8 +414,11 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
> >   struct drm_device *dev = plane->dev;
> >   struct drm_i915_private *dev_priv = dev->dev_private;
> >   struct intel_plane *intel_plane = to_intel_plane(plane);
> > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   int pipe = intel_plane->pipe;
> >  
> > + intel_pipe_update_start(intel_crtc);
> > +
> >   I915_WRITE(SPRCTL(pipe), I915_READ(SPRCTL(pipe)) & ~SPRITE_ENABLE);
> >   /* Can't leave the scaler enabled... */
> >   if (intel_plane->can_scale)
> > @@ -336,6 +427,8 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
> >   I915_MODIFY_DISPBASE(SPRSURF(pipe), 0);
> >   POSTING_READ(SPRSURF(pipe));
> >  
> > + intel_pipe_update_end(intel_crtc);
> > +
> >   /*
> >   * Avoid underruns when disabling the sprite.
> >   * FIXME remove once watermark updates are done properly.
> > @@ -410,6 +503,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
> >   struct drm_device *dev = plane->dev;
> >   struct drm_i915_private *dev_priv = dev->dev_private;
> >   struct intel_plane *intel_plane = to_intel_plane(plane);
> > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   int pipe = intel_plane->pipe;
> >   unsigned long dvssurf_offset, linear_offset;
> >   u32 dvscntr, dvsscale;
> > @@ -478,6 +572,8 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
> >         pixel_size, fb->pitches[0]);
> >   linear_offset -= dvssurf_offset;
> >  
> > + intel_pipe_update_start(intel_crtc);
> > +
> >   I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
> >   I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
> >  
> > @@ -492,6 +588,8 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
> >   I915_MODIFY_DISPBASE(DVSSURF(pipe),
> >       i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
> >   POSTING_READ(DVSSURF(pipe));
> > +
> > + intel_pipe_update_end(intel_crtc);
> >  }
> >  
> >  static void
> > @@ -500,8 +598,11 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
> >   struct drm_device *dev = plane->dev;
> >   struct drm_i915_private *dev_priv = dev->dev_private;
> >   struct intel_plane *intel_plane = to_intel_plane(plane);
> > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   int pipe = intel_plane->pipe;
> >  
> > + intel_pipe_update_start(intel_crtc);
> > +
> >   I915_WRITE(DVSCNTR(pipe), I915_READ(DVSCNTR(pipe)) & ~DVS_ENABLE);
> >   /* Disable the scaler */
> >   I915_WRITE(DVSSCALE(pipe), 0);
> > @@ -509,6 +610,8 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
> >   I915_MODIFY_DISPBASE(DVSSURF(pipe), 0);
> >   POSTING_READ(DVSSURF(pipe));
> >  
> > + intel_pipe_update_end(intel_crtc);
> > +
> >   /*
> >   * Avoid underruns when disabling the sprite.
> >   * FIXME remove once watermark updates are done properly.
> > --
> > 1.8.3.2
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux