Re: [PATCH 5/6] drm/i915: Grab modeset locks for GPU rest on pre-ctg

Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> · Fri, 21 Nov 2014 23:10:31 +0200

On Fri, Nov 21, 2014 at 09:49:21PM +0100, Daniel Vetter wrote:
> On Fri, Nov 21, 2014 at 09:54:29PM +0200, ville.syrjala@xxxxxxxxxxxxxxx wrote:
> > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> > 
> > On gen4 and earlier the GPU reset also resets the display, so we should
> > protect against concurrent modeset operations. Grab all the modeset locks
> > around the entire GPU reset dance, remebering first ti dislogde any
> > pending page flip to make sure we don't deadlock. Any pageflip coming
> > in between these two steps should fail anyway due to reset_in_progress,
> > so this should be safe.
> > 
> > This fixes a lot of failed asserts in the modeset code when there's a
> > modeset racing with the reset. Naturally the asserts aren't happy when
> > the expected state has disappeared.
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> 
> Two comments on this one, otherwise looks good (well didn't bother to
> check the new reset register frobbing).
> -Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c      | 19 -------
> >  drivers/gpu/drm/i915/i915_irq.c      |  5 +-
> >  drivers/gpu/drm/i915/intel_display.c | 96 ++++++++++++++++++++++++++++++------
> >  drivers/gpu/drm/i915/intel_drv.h     |  3 +-
> >  4 files changed, 86 insertions(+), 37 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 5066fd1..1e9c136 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -880,25 +880,6 @@ int i915_reset(struct drm_device *dev)
> >  		 */
> >  		if (INTEL_INFO(dev)->gen > 5)
> >  			intel_reset_gt_powersave(dev);
> > -
> > -
> > -		if (IS_GEN3(dev) || (IS_GEN4(dev) && !IS_G4X(dev))) {
> > -			intel_runtime_pm_disable_interrupts(dev_priv);
> > -			intel_runtime_pm_enable_interrupts(dev_priv);
> > -
> > -			intel_modeset_init_hw(dev);
> > -
> > -			spin_lock_irq(&dev_priv->irq_lock);
> > -			if (dev_priv->display.hpd_irq_setup)
> > -				dev_priv->display.hpd_irq_setup(dev);
> > -			spin_unlock_irq(&dev_priv->irq_lock);
> > -
> > -			drm_modeset_lock_all(dev);
> > -			intel_modeset_setup_hw_state(dev, true);
> > -			drm_modeset_unlock_all(dev);
> > -
> > -			intel_hpd_init(dev_priv);
> > -		}
> >  	} else {
> >  		mutex_unlock(&dev->struct_mutex);
> >  	}
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 5908580d..8887674 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -2428,6 +2428,9 @@ static void i915_error_work_func(struct work_struct *work)
> >  		 * simulated reset via debugs, so get an RPM reference.
> >  		 */
> >  		intel_runtime_pm_get(dev_priv);
> > +
> > +		intel_prepare_reset(dev);
> > +
> >  		/*
> >  		 * All state reset _must_ be completed before we update the
> >  		 * reset counter, for otherwise waiters might miss the reset
> > @@ -2436,7 +2439,7 @@ static void i915_error_work_func(struct work_struct *work)
> >  		 */
> >  		ret = i915_reset(dev);
> >  
> > -		intel_display_handle_reset(dev);
> > +		intel_finish_reset(dev);
> >  
> >  		intel_runtime_pm_put(dev_priv);
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 3218455..8329f7c 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -2765,25 +2765,10 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> >  	return 0;
> >  }
> >  
> > -void intel_display_handle_reset(struct drm_device *dev)
> > +static void intel_complete_page_flips(struct drm_device *dev)
> >  {
> > -	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct drm_crtc *crtc;
> >  
> > -	/*
> > -	 * Flips in the rings have been nuked by the reset,
> > -	 * so complete all pending flips so that user space
> > -	 * will get its events and not get stuck.
> > -	 *
> > -	 * Also update the base address of all primary
> > -	 * planes to the the last fb to make sure we're
> > -	 * showing the correct fb after a reset.
> > -	 *
> > -	 * Need to make two loops over the crtcs so that we
> > -	 * don't try to grab a crtc mutex before the
> > -	 * pending_flip_queue really got woken up.
> > -	 */
> > -
> >  	for_each_crtc(dev, crtc) {
> >  		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >  		enum plane plane = intel_crtc->plane;
> > @@ -2791,6 +2776,12 @@ void intel_display_handle_reset(struct drm_device *dev)
> >  		intel_prepare_page_flip(dev, plane);
> >  		intel_finish_page_flip_plane(dev, plane);
> >  	}
> > +}
> > +
> > +static void intel_update_primary_planes(struct drm_device *dev)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct drm_crtc *crtc;
> >  
> >  	for_each_crtc(dev, crtc) {
> >  		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > @@ -2810,6 +2801,79 @@ void intel_display_handle_reset(struct drm_device *dev)
> >  	}
> >  }
> >  
> > +void intel_prepare_reset(struct drm_device *dev)
> > +{
> > +	if (!drm_core_check_feature(dev, DRIVER_MODESET))
> > +		return;
> 
> ums just officially died please remove. Same for the one below.

okey dokey

> 
> > +
> > +	/*
> > +	 * Flips in the rings will be nuked by the reset,
> > +	 * so complete all pending flips so that user space
> > +	 * will get its events and not get stuck.
> > +	 *
> > +	 * Old platforms will also reset the display, so we
> > +	 * need to grab the modeset locks around the reset.
> > +	 * But in order to do that we must let any pending
> > +	 * page flip wait complete since the waiters may be
> > +	 * holding some modeset locks.
> > +	 */
> > +	intel_complete_page_flips(dev);
> 
> Is this really required? We complete them afterwards, and all the pageflip
> waiters I've found do check for gpu hangs and abort the pageflip wait.
> That's already required since the mmio flip might go missing, and thus far
> we've only completed the flip _after_ having reset the gpu and gem state
> (and grabbed dev->struct_mutex).

Hmm. Yeah, just waking them up ought to be sufficient to dislodge
things. And we already do that before scheduling the error work, but
after setting the reset_in_progress flag, which is very much critical
here. So I guess I could just move the complete pending flips bit to
intel_finish_reset().

But then I do wonder a bit why I originally needed to add the unlocked
page flip complete before the locked .update_plane() call. Did we miss
a wakeup somewhere or did we not abort pending flip waits on reset?

> 
> > +
> > +	/* no reset support for gen2 */
> > +	if (IS_GEN2(dev))
> > +		return;
> > +
> > +	/* reset doesn't touch the display */
> > +	if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
> > +		return;
> > +
> > +	drm_modeset_lock_all(dev);
> > +}
> > +
> > +void intel_finish_reset(struct drm_device *dev)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(dev);
> > +
> > +	if (!drm_core_check_feature(dev, DRIVER_MODESET))
> > +		return;
> > +
> > +	/* no reset support for gen2 */
> > +	if (IS_GEN2(dev))
> > +		return;
> > +
> > +	/* reset doesn't touch the display */
> > +	if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev)) {
> > +		/*
> > +		 * Flips in the rings have been nuked by the reset,
> > +		 * so update the base address of all primary
> > +		 * planes to the the last fb to make sure we're
> > +		 * showing the correct fb after a reset.
> > +		 */
> > +		intel_update_primary_planes(dev);
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * The display has been reset as well,
> > +	 * so need a full re-initialization.
> > +	 */
> > +	intel_runtime_pm_disable_interrupts(dev_priv);
> > +	intel_runtime_pm_enable_interrupts(dev_priv);
> > +
> > +	intel_modeset_init_hw(dev);
> > +
> > +	spin_lock_irq(&dev_priv->irq_lock);
> > +	if (dev_priv->display.hpd_irq_setup)
> > +		dev_priv->display.hpd_irq_setup(dev);
> > +	spin_unlock_irq(&dev_priv->irq_lock);
> > +
> > +	intel_modeset_setup_hw_state(dev, true);
> > +
> > +	intel_hpd_init(dev_priv);
> > +
> > +	drm_modeset_unlock_all(dev);
> > +}
> > +
> >  static int
> >  intel_finish_fb(struct drm_framebuffer *old_fb)
> >  {
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index f0a46ec..25fdbb1 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -958,7 +958,8 @@ unsigned long intel_gen4_compute_page_offset(int *x, int *y,
> >  					     unsigned int tiling_mode,
> >  					     unsigned int bpp,
> >  					     unsigned int pitch);
> > -void intel_display_handle_reset(struct drm_device *dev);
> > +void intel_prepare_reset(struct drm_device *dev);
> > +void intel_finish_reset(struct drm_device *dev);
> >  void hsw_enable_pc8(struct drm_i915_private *dev_priv);
> >  void hsw_disable_pc8(struct drm_i915_private *dev_priv);
> >  void intel_dp_get_m_n(struct intel_crtc *crtc,
> > -- 
> > 2.0.4
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx