Re: [PATCH 25/55] drm/i915: Update i915_gem_object_sync() to take a request structure

Tomas Elf <tomas.elf@xxxxxxxxx> · Tue, 02 Jun 2015 19:26:50 +0100

On 29/05/2015 17:43, John.C.Harrison@xxxxxxxxx wrote:
From: John Harrison <John.C.Harrison@xxxxxxxxx>

The plan is to pass requests around as the basic submission tracking structure
rather than rings and contexts. This patch updates the i915_gem_object_sync()
code path.

v2: Much more complex patch to share a single request between the sync and the
page flip. The _sync() function now supports lazy allocation of the request
structure. That is, if one is passed in then that will be used. If one is not,
then a request will be allocated and passed back out. Note that the _sync() code
does not necessarily require a request. Thus one will only be created until
certain situations. The reason the lazy allocation must be done within the
_sync() code itself is because the decision to need one or not is not really
something that code above can second guess (except in the case where one is
definitely not required because no ring is passed in).

The call chains above _sync() now support passing a request through which most
callers passing in NULL and assuming that no request will be required (because
they also pass in NULL for the ring and therefore can't be generating any ring
code).

The exeception is intel_crtc_page_flip() which now supports having a request

1. "The exeception" -> "The exception"

returned from _sync(). If one is, then that request is shared by the page flip
(if the page flip is of a type to need a request). If _sync() does not generate
a request but the page flip does need one, then the page flip path will create
its own request.

v3: Updated comment description to be clearer about 'to_req' parameter (Tomas
Elf review request). Rebased onto newer tree that significantly changed the
synchronisation code.

For: VIZ-5115
Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_drv.h            |    4 ++-
  drivers/gpu/drm/i915/i915_gem.c            |   48 +++++++++++++++++++++-------
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
  drivers/gpu/drm/i915/intel_display.c       |   17 +++++++---
  drivers/gpu/drm/i915/intel_drv.h           |    3 +-
  drivers/gpu/drm/i915/intel_fbdev.c         |    2 +-
  drivers/gpu/drm/i915/intel_lrc.c           |    2 +-
  drivers/gpu/drm/i915/intel_overlay.c       |    2 +-
  8 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 64a10fa..f69e9cb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2778,7 +2778,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)

  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
+			 struct intel_engine_cs *to,
+			 struct drm_i915_gem_request **to_req);
  void i915_vma_move_to_active(struct i915_vma *vma,
  			     struct intel_engine_cs *ring);
  int i915_gem_dumb_create(struct drm_file *file_priv,
@@ -2889,6 +2890,7 @@ int __must_check
  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
  				     u32 alignment,
  				     struct intel_engine_cs *pipelined,
+				     struct drm_i915_gem_request **pipelined_request,
  				     const struct i915_ggtt_view *view);
  void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
  					      const struct i915_ggtt_view *view);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b7d66aa..db90043 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3098,25 +3098,26 @@ out:
  static int
  __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  		       struct intel_engine_cs *to,
-		       struct drm_i915_gem_request *req)
+		       struct drm_i915_gem_request *from_req,
+		       struct drm_i915_gem_request **to_req)
  {
  	struct intel_engine_cs *from;
  	int ret;

-	from = i915_gem_request_get_ring(req);
+	from = i915_gem_request_get_ring(from_req);
  	if (to == from)
  		return 0;

-	if (i915_gem_request_completed(req, true))
+	if (i915_gem_request_completed(from_req, true))
  		return 0;

-	ret = i915_gem_check_olr(req);
+	ret = i915_gem_check_olr(from_req);
  	if (ret)
  		return ret;

  	if (!i915_semaphore_is_enabled(obj->base.dev)) {
  		struct drm_i915_private *i915 = to_i915(obj->base.dev);
-		ret = __i915_wait_request(req,
+		ret = __i915_wait_request(from_req,
  					  atomic_read(&i915->gpu_error.reset_counter),
  					  i915->mm.interruptible,
  					  NULL,
@@ -3124,15 +3125,25 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  		if (ret)
  			return ret;

-		i915_gem_object_retire_request(obj, req);
+		i915_gem_object_retire_request(obj, from_req);
  	} else {
  		int idx = intel_ring_sync_index(from, to);
-		u32 seqno = i915_gem_request_get_seqno(req);
+		u32 seqno = i915_gem_request_get_seqno(from_req);

+		WARN_ON(!to_req);
+
+		/* Optimization: Avoid semaphore sync when we are sure we already
+		 * waited for an object with higher seqno */

2. How about using the standard multi-line comment format?

/* (empty line)
 * (first line)
 * (second line)
 */

  		if (seqno <= from->semaphore.sync_seqno[idx])
  			return 0;

-		trace_i915_gem_ring_sync_to(from, to, req);
+		if (*to_req == NULL) {
+			ret = i915_gem_request_alloc(to, to->default_context, to_req);
+			if (ret)
+				return ret;
+		}
+
+		trace_i915_gem_ring_sync_to(from, to, from_req);
  		ret = to->semaphore.sync_to(to, from, seqno);
  		if (ret)
  			return ret;
@@ -3153,6 +3164,9 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
   *
   * @obj: object which may be in use on another ring.
   * @to: ring we wish to use the object on. May be NULL.
+ * @to_req: request we wish to use the object for. See below.
+ *          This will be allocated and returned if a request is
+ *          required but not passed in.
   *
   * This code is meant to abstract object synchronization with the GPU.
   * Calling with NULL implies synchronizing the object with the CPU
@@ -3168,11 +3182,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
   * - If we are a write request (pending_write_domain is set), the new
   *   request must wait for outstanding read requests to complete.
   *
+ * For CPU synchronisation (NULL to) no request is required. For syncing with
+ * rings to_req must be non-NULL. However, a request does not have to be
+ * pre-allocated. If *to_req is null and sync commands will be emitted then a
+ * request will be allocated automatically and returned through *to_req. Note
+ * that it is not guaranteed that commands will be emitted (because the
+ * might already be idle). Hence there is no need to create a request that
+ * might never have any work submitted. Note further that if a request is
+ * returned in *to_req, it is the responsibility of the caller to submit
+ * that request (after potentially adding more work to it).
+ *

3. "(because the might already be idle)" : The what? The engine?
4. "NULL" and "null" mixed. Please be consistent.

Overall, the explanation is better than in the last patch version

With those minor changes:

Reviewed-by: Tomas Elf <tomas.elf@xxxxxxxxx>

Thanks,
Tomas

   * Returns 0 if successful, else propagates up the lower layer error.
   */
  int
  i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct intel_engine_cs *to,
+		     struct drm_i915_gem_request **to_req)
  {
  	const bool readonly = obj->base.pending_write_domain == 0;
  	struct drm_i915_gem_request *req[I915_NUM_RINGS];
@@ -3194,7 +3219,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
  				req[n++] = obj->last_read_req[i];
  	}
  	for (i = 0; i < n; i++) {
-		ret = __i915_gem_object_sync(obj, to, req[i]);
+		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
  		if (ret)
  			return ret;
  	}
@@ -4144,12 +4169,13 @@ int
  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
  				     u32 alignment,
  				     struct intel_engine_cs *pipelined,
+				     struct drm_i915_gem_request **pipelined_request,
  				     const struct i915_ggtt_view *view)
  {
  	u32 old_read_domains, old_write_domain;
  	int ret;

-	ret = i915_gem_object_sync(obj, pipelined);
+	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
  	if (ret)
  		return ret;

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 50b1ced..bea92ad 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -899,7 +899,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
  		struct drm_i915_gem_object *obj = vma->obj;

  		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->ring);
+			ret = i915_gem_object_sync(obj, req->ring, &req);
  			if (ret)
  				return ret;
  		}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 657a333..6528ada 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2338,7 +2338,8 @@ int
  intel_pin_and_fence_fb_obj(struct drm_plane *plane,
  			   struct drm_framebuffer *fb,
  			   const struct drm_plane_state *plane_state,
-			   struct intel_engine_cs *pipelined)
+			   struct intel_engine_cs *pipelined,
+			   struct drm_i915_gem_request **pipelined_request)
  {
  	struct drm_device *dev = fb->dev;
  	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2403,7 +2404,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,

  	dev_priv->mm.interruptible = false;
  	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
-						   &view);
+						   pipelined_request, &view);
  	if (ret)
  		goto err_interruptible;

@@ -11119,6 +11120,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
  	struct intel_unpin_work *work;
  	struct intel_engine_cs *ring;
  	bool mmio_flip;
+	struct drm_i915_gem_request *request = NULL;
  	int ret;

  	/*
@@ -11225,7 +11227,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
  	 */
  	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
  					 crtc->primary->state,
-					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring);
+					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
  	if (ret)
  		goto cleanup_pending;

@@ -11256,6 +11258,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
  					intel_ring_get_request(ring));
  	}

+	if (request)
+		i915_add_request_no_flush(request->ring);
+
  	work->flip_queued_vblank = drm_crtc_vblank_count(crtc);
  	work->enable_stall_check = true;

@@ -11273,6 +11278,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
  cleanup_unpin:
  	intel_unpin_fb_obj(fb, crtc->primary->state);
  cleanup_pending:
+	if (request)
+		i915_gem_request_cancel(request);
  	atomic_dec(&intel_crtc->unpin_work_count);
  	mutex_unlock(&dev->struct_mutex);
  cleanup:
@@ -13171,7 +13178,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
  		if (ret)
  			DRM_DEBUG_KMS("failed to attach phys object\n");
  	} else {
-		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL);
+		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
  	}

  	if (ret == 0)
@@ -15218,7 +15225,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
  		ret = intel_pin_and_fence_fb_obj(c->primary,
  						 c->primary->fb,
  						 c->primary->state,
-						 NULL);
+						 NULL, NULL);
  		mutex_unlock(&dev->struct_mutex);
  		if (ret) {
  			DRM_ERROR("failed to pin boot fb on pipe %d\n",
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 02d8317..73650ae 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1034,7 +1034,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
  int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
  			       struct drm_framebuffer *fb,
  			       const struct drm_plane_state *plane_state,
-			       struct intel_engine_cs *pipelined);
+			       struct intel_engine_cs *pipelined,
+			       struct drm_i915_gem_request **pipelined_request);
  struct drm_framebuffer *
  __intel_framebuffer_create(struct drm_device *dev,
  			   struct drm_mode_fb_cmd2 *mode_cmd,
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 4e7e7da..dd9f3b2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
  	}

  	/* Flush everything out, we'll be doing GTT only from now on */
-	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL);
+	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
  	if (ret) {
  		DRM_ERROR("failed to pin obj: %d\n", ret);
  		goto out_fb;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6d005b1..f8e8fdb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -638,7 +638,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
  		struct drm_i915_gem_object *obj = vma->obj;

  		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->ring);
+			ret = i915_gem_object_sync(obj, req->ring, &req);
  			if (ret)
  				return ret;
  		}
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index e7534b9..0f8187a 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
  	if (ret != 0)
  		return ret;

-	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL,
+	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
  						   &i915_ggtt_view_normal);
  	if (ret != 0)
  		return ret;


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx