Re: [PATCH 5/5] drm/i915: Wait for object idle without locks in atomic_commit.

Matt Roper <matthew.d.roper@xxxxxxxxx> · Wed, 28 Oct 2015 17:30:51 -0700

On Wed, Sep 23, 2015 at 01:27:12PM +0200, Maarten Lankhorst wrote:
> Make pinning and waiting a separate step, and wait for object idle
> without struct_mutex held.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_drv.h           |  2 -
>  drivers/gpu/drm/i915/i915_gem.c           |  6 ---
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  2 +
>  drivers/gpu/drm/i915/intel_display.c      | 86 ++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_drv.h          |  7 +--
>  drivers/gpu/drm/i915/intel_fbdev.c        |  2 +-
>  drivers/gpu/drm/i915/intel_overlay.c      |  6 ++-
>  7 files changed, 84 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3bf8a9b771d0..ec72fd457499 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2982,8 +2982,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
>  int __must_check
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
> -				     struct intel_engine_cs *pipelined,
> -				     struct drm_i915_gem_request **pipelined_request,
>  				     const struct i915_ggtt_view *view);
>  void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
>  					      const struct i915_ggtt_view *view);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 46f0e83ee6ee..ab02182c47a5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3782,17 +3782,11 @@ unlock:
>  int
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
> -				     struct intel_engine_cs *pipelined,
> -				     struct drm_i915_gem_request **pipelined_request,
>  				     const struct i915_ggtt_view *view)
>  {
>  	u32 old_read_domains, old_write_domain;
>  	int ret;
>  
> -	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
> -	if (ret)
> -		return ret;
> -
>  	/* Mark the pin_display early so that we account for the
>  	 * display coherency whilst setting up the cache domains.
>  	 */
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index a11980696595..c6bb0fc1edfb 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -84,6 +84,7 @@ intel_plane_duplicate_state(struct drm_plane *plane)
>  	state = &intel_state->base;
>  
>  	__drm_atomic_helper_plane_duplicate_state(plane, state);
> +	intel_state->wait_req = NULL;
>  
>  	return state;
>  }
> @@ -100,6 +101,7 @@ void
>  intel_plane_destroy_state(struct drm_plane *plane,
>  			  struct drm_plane_state *state)
>  {
> +	WARN_ON(state && to_intel_plane_state(state)->wait_req);
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 2f046134cc9a..d817c44ee428 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2291,9 +2291,7 @@ static unsigned int intel_linear_alignment(struct drm_i915_private *dev_priv)
>  int
>  intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>  			   struct drm_framebuffer *fb,
> -			   const struct drm_plane_state *plane_state,
> -			   struct intel_engine_cs *pipelined,
> -			   struct drm_i915_gem_request **pipelined_request)
> +			   const struct drm_plane_state *plane_state)
>  {
>  	struct drm_device *dev = fb->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2349,8 +2347,8 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>  	 */
>  	intel_runtime_pm_get(dev_priv);
>  
> -	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
> -						   pipelined_request, &view);
> +	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
> +						   &view);

The pin to display plane (and hence the wait) happens inside
intel_runtime_pm_get/put() in the current code.  When you pull the wait
out to the various callsites, it isn't holding runtime pm anymore (at
least not in a way that's obvious to me).  Can this be a problem?
Neither runtime PM nor GEM internals are something I'm terribly familiar
with, so you might want to get an ack from someone like Paulo or Chris?

>  	if (ret)
>  		goto err_pm;
>  
> @@ -11357,9 +11355,14 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	 * synchronisation, so all we want here is to pin the framebuffer
>  	 * into the display plane and skip any waits.
>  	 */
> +	if (!mmio_flip) {
> +		ret = i915_gem_object_sync(obj, ring, &request);
> +		if (ret)
> +			goto cleanup_pending;
> +	}
> +
>  	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
> -					 crtc->primary->state,
> -					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
> +					 crtc->primary->state);

Probably a dumb question from someone who isn't familiar with GEM
handling...the comment above the lines you added here says we just want
to pin and skip any waits when doing MMIO flips (which matches the logic
of your code change).  However it looks to me like the original code was
still doing a wait, just with a potentially different ring parameter.
What am I missing here?

>  	if (ret)
>  		goto cleanup_pending;
>  
> @@ -12993,7 +12996,10 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>  				       struct drm_atomic_state *state,
>  				       bool async)
>  {
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_plane_state *plane_state;
>  	struct drm_crtc_state *crtc_state;
> +	struct drm_plane *plane;
>  	struct drm_crtc *crtc;
>  	int i, ret;
>  
> @@ -13006,6 +13012,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>  		ret = intel_crtc_wait_for_pending_flips(crtc);
>  		if (ret)
>  			return ret;
> +
> +		if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
> +			flush_workqueue(dev_priv->wq);
>  	}
>  
>  	ret = i915_mutex_lock_interruptible(dev);
> @@ -13013,6 +13022,37 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>  		return ret;
>  
>  	ret = drm_atomic_helper_prepare_planes(dev, state);
> +	if (!ret && !async) {
> +		u32 reset_counter;
> +
> +		reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
> +		mutex_unlock(&dev->struct_mutex);
> +
> +		for_each_plane_in_state(state, plane, plane_state, i) {
> +			struct intel_plane_state *intel_plane_state =
> +				to_intel_plane_state(plane_state);
> +
> +			if (!intel_plane_state->wait_req)
> +				continue;
> +
> +			ret = __i915_wait_request(intel_plane_state->wait_req,
> +						  reset_counter, true,
> +						  NULL, NULL);
> +
> +			/* Swallow -EIO errors to allow updates during hw lockup. */
> +			if (ret == -EIO)
> +				ret = 0;
> +
> +			if (ret)
> +				break;
> +		}
> +
> +		if (!ret)
> +			return 0;
> +
> +		mutex_lock(&dev->struct_mutex);
> +		drm_atomic_helper_cleanup_planes(dev, state);
> +	}
>  
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
> @@ -13039,15 +13079,17 @@ static int intel_atomic_commit(struct drm_device *dev,
>  			       bool async)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_crtc *crtc;
>  	struct drm_crtc_state *crtc_state;
> +	struct drm_crtc *crtc;
>  	int ret = 0;
>  	int i;
>  	bool any_ms = false;
>  
>  	ret = intel_atomic_prepare_commit(dev, state, async);
> -	if (ret)
> +	if (ret) {
> +		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
>  		return ret;
> +	}
>  
>  	drm_atomic_helper_swap_state(dev, state);
>  
> @@ -13318,7 +13360,8 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>  		 * This should only fail upon a hung GPU, in which case we
>  		 * can safely continue.
>  		 */
> -		if (needs_modeset(crtc_state))
> +		if (needs_modeset(crtc_state) &&
> +		    to_intel_plane_state(plane->state)->visible)
>  			ret = i915_gem_object_wait_rendering(old_obj, true);
>  
>  		/* Swallow -EIO errors to allow updates during hw lockup. */
> @@ -13335,11 +13378,21 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>  		if (ret)
>  			DRM_DEBUG_KMS("failed to attach phys object\n");
>  	} else {
> -		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
> +		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state);
>  	}
>  
> -	if (ret == 0)
> +	if (ret == 0) {
> +		if (obj && obj->last_write_req &&
> +		    !i915_gem_request_completed(obj->last_write_req, true)) {
> +			struct intel_plane_state *plane_state =
> +				to_intel_plane_state(new_state);
> +
> +			i915_gem_request_assign(&plane_state->wait_req,
> +						obj->last_write_req);
> +		}
> +
>  		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
> +	}
>  
>  	return ret;
>  }
> @@ -13357,9 +13410,12 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
>  {
>  	struct drm_device *dev = plane->dev;
>  	struct intel_plane *intel_plane = to_intel_plane(plane);
> +	struct intel_plane_state *old_intel_state;
>  	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
>  	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
>  
> +	old_intel_state = to_intel_plane_state(old_state);
> +
>  	if (!obj && !old_obj)
>  		return;
>  
> @@ -13371,6 +13427,9 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
>  	if ((old_obj && (old_obj->frontbuffer_bits & intel_plane->frontbuffer_bit)) ||
>  	    (obj && !(obj->frontbuffer_bits & intel_plane->frontbuffer_bit)))
>  		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
> +
> +	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
> +
>  }
>  
>  int
> @@ -15348,8 +15407,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
>  		mutex_lock(&dev->struct_mutex);
>  		ret = intel_pin_and_fence_fb_obj(c->primary,
>  						 c->primary->fb,
> -						 c->primary->state,
> -						 NULL, NULL);
> +						 c->primary->state);
>  		mutex_unlock(&dev->struct_mutex);
>  		if (ret) {
>  			DRM_ERROR("failed to pin boot fb on pipe %d\n",
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index cfdb0f2714cd..852a0d192f82 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -270,6 +270,9 @@ struct intel_plane_state {
>  	int scaler_id;
>  
>  	struct drm_intel_sprite_colorkey ckey;
> +
> +	/* async flip related structures */
> +	struct drm_i915_gem_request *wait_req;
>  };
>  
>  struct intel_initial_plane_config {
> @@ -1055,9 +1058,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
>  				    struct drm_modeset_acquire_ctx *ctx);
>  int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>  			       struct drm_framebuffer *fb,
> -			       const struct drm_plane_state *plane_state,
> -			       struct intel_engine_cs *pipelined,
> -			       struct drm_i915_gem_request **pipelined_request);
> +			       const struct drm_plane_state *plane_state);
>  struct drm_framebuffer *
>  __intel_framebuffer_create(struct drm_device *dev,
>  			   struct drm_mode_fb_cmd2 *mode_cmd,
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 65329127f0b9..51afee61ba7e 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -155,7 +155,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  	}
>  
>  	/* Flush everything out, we'll be doing GTT only from now on */
> -	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
> +	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL);
>  	if (ret) {
>  		DRM_ERROR("failed to pin obj: %d\n", ret);
>  		goto out_fb;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 444542696a2c..1b18cc6bdbd6 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -749,7 +749,11 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>  	if (ret != 0)
>  		return ret;
>  
> -	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
> +	ret = i915_gem_object_wait_rendering(new_bo, true);

Again, I'm not super familiar with GEM internals...can this be a
behavior change from the previous code?  Originally the pin_to_display
plane function would have passed (obj->base.pending_write_domain == 0)
as the second parameter here (readonly), but you're unconditionally
passing true.  Can there not be pending writes against this object?

Overall I feel like this patch goes a bit too far outside my area of
expertise for me to serve as the final reviewer for it.  Maybe it would
be good to have someone like Chris or Ville take a look since they have
a lot more experience in this area.

Matt

> +	if (ret)
> +		return ret;
> +
> +	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
>  						   &i915_ggtt_view_normal);
>  	if (ret != 0)
>  		return ret;
> -- 
> 2.1.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Matt Roper
Graphics Software Engineer
IoTG Platform Enabling & Development
Intel Corporation
(916) 356-2795
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx