Re: [PATCH 18/31] drm/i915: Simplify request_alloc by returning the allocated request

Dave Gordon <david.s.gordon@xxxxxxxxx> · Wed, 27 Jul 2016 12:08:35 +0100

On 25/07/16 10:18, Joonas Lahtinen wrote:
On ma, 2016-07-25 at 08:44 +0100, Chris Wilson wrote:
If is simpler and leads to more readable code through the callstack if
the allocation returns the allocated struct through the return value.

The importance of this is that it no longer looks like we accidentally
allocate requests as side-effect of calling certain functions.

Dave seems to have expressed to wish to review this around January, so
CC'ing him here.

From me,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h            |  3 +-
 drivers/gpu/drm/i915/i915_gem.c            | 75 ++++++++----------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++---
 drivers/gpu/drm/i915/i915_gem_request.c    | 58 ++++++++---------------
 drivers/gpu/drm/i915/i915_trace.h          | 13 +++---
 drivers/gpu/drm/i915/intel_display.c       | 36 ++++++--------
 drivers/gpu/drm/i915/intel_lrc.c           |  2 +-
 drivers/gpu/drm/i915/intel_overlay.c       | 20 ++++----
 8 files changed, 79 insertions(+), 140 deletions(-)

The actual code looks fine, but the patch amalgamates several different 
changes, only one  of which is mentioned in the description above.

1. Change the signature of i915_gem_object_sync() and its internal
   subfunction __i915_gem_object_sync() to always require a request
   as input, with corresponding rework of the callsites. This enables
   the removal of the internal call to i915_gem_request_alloc().

2. Change API of i915_gem_request_alloc() not to allow NULL ctx,
   which involves changing various of the remaining callsites.

3. Flatten __i915_gem_request_alloc() into i915_gem_request_alloc().

The description really only covers (3), which is the simplest and most 
mechanical of the changes. So preferably two or three separate patches, 
in whatever order makes the most sense, with a bit more description of each.

.Dave.

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 09e72b0510ab..1031f5a7126a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3171,8 +3171,7 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)

 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to,
-			 struct drm_i915_gem_request **to_req);
+			 struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 59890f523c5f..b6c4ff63725f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2845,51 +2845,35 @@ out:

 static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		       struct intel_engine_cs *to,
-		       struct drm_i915_gem_request *from_req,
-		       struct drm_i915_gem_request **to_req)
+		       struct drm_i915_gem_request *to,
+		       struct drm_i915_gem_request *from)
 {
-	struct intel_engine_cs *from;
 	int ret;

-	from = i915_gem_request_get_engine(from_req);
-	if (to == from)
+	if (to->engine == from->engine)
 		return 0;

-	if (i915_gem_request_completed(from_req))
+	if (i915_gem_request_completed(from))
 		return 0;

 	if (!i915.semaphores) {
-		struct drm_i915_private *i915 = to_i915(obj->base.dev);
-		ret = __i915_wait_request(from_req,
-					  i915->mm.interruptible,
+		ret = __i915_wait_request(from,
+					  from->i915->mm.interruptible,
 					  NULL,
 					  NO_WAITBOOST);
 		if (ret)
 			return ret;

-		i915_gem_object_retire_request(obj, from_req);
+		i915_gem_object_retire_request(obj, from);
 	} else {
-		int idx = intel_engine_sync_index(from, to);
-		u32 seqno = i915_gem_request_get_seqno(from_req);
+		int idx = intel_engine_sync_index(from->engine, to->engine);
+		u32 seqno = i915_gem_request_get_seqno(from);

-		WARN_ON(!to_req);
-
-		if (seqno <= from->semaphore.sync_seqno[idx])
+		if (seqno <= from->engine->semaphore.sync_seqno[idx])
 			return 0;

-		if (*to_req == NULL) {
-			struct drm_i915_gem_request *req;
-
-			req = i915_gem_request_alloc(to, NULL);
-			if (IS_ERR(req))
-				return PTR_ERR(req);
-
-			*to_req = req;
-		}
-
-		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
-		ret = to->semaphore.sync_to(*to_req, from, seqno);
+		trace_i915_gem_ring_sync_to(to, from);
+		ret = to->engine->semaphore.sync_to(to, from->engine, seqno);
 		if (ret)
 			return ret;

@@ -2897,8 +2881,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		 * might have just caused seqno wrap under
 		 * the radar.
 		 */
-		from->semaphore.sync_seqno[idx] =
-			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+		from->engine->semaphore.sync_seqno[idx] =
+			i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]);
 	}

 	return 0;
@@ -2908,17 +2892,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * i915_gem_object_sync - sync an object to a ring.
  *
  * @obj: object which may be in use on another ring.
- * @to: ring we wish to use the object on. May be NULL.
- * @to_req: request we wish to use the object for. See below.
- *          This will be allocated and returned if a request is
- *          required but not passed in.
+ * @to: request we are wishing to use
  *
  * This code is meant to abstract object synchronization with the GPU.
- * Calling with NULL implies synchronizing the object with the CPU
- * rather than a particular GPU ring. Conceptually we serialise writes
- * between engines inside the GPU. We only allow one engine to write
- * into a buffer at any time, but multiple readers. To ensure each has
- * a coherent view of memory, we must:
+ * Conceptually we serialise writes between engines inside the GPU.
+ * We only allow one engine to write into a buffer at any time, but
+ * multiple readers. To ensure each has a coherent view of memory, we must:
  *
  * - If there is an outstanding write request to the object, the new
  *   request must wait for it to complete (either CPU or in hw, requests
@@ -2927,22 +2906,11 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * - If we are a write request (pending_write_domain is set), the new
  *   request must wait for outstanding read requests to complete.
  *
- * For CPU synchronisation (NULL to) no request is required. For syncing with
- * rings to_req must be non-NULL. However, a request does not have to be
- * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
- * request will be allocated automatically and returned through *to_req. Note
- * that it is not guaranteed that commands will be emitted (because the system
- * might already be idle). Hence there is no need to create a request that
- * might never have any work submitted. Note further that if a request is
- * returned in *to_req, it is the responsibility of the caller to submit
- * that request (after potentially adding more work to it).
- *
  * Returns 0 if successful, else propagates up the lower layer error.
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to,
-		     struct drm_i915_gem_request **to_req)
+		     struct drm_i915_gem_request *to)
 {
 	const bool readonly = obj->base.pending_write_domain == 0;
 	struct drm_i915_gem_request *req[I915_NUM_ENGINES];
@@ -2951,9 +2919,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	if (!obj->active)
 		return 0;

-	if (to == NULL)
-		return i915_gem_object_wait_rendering(obj, readonly);
-
 	n = 0;
 	if (readonly) {
 		if (obj->last_write_req)
@@ -2964,7 +2929,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 				req[n++] = obj->last_read_req[i];
 	}
 	for (i = 0; i < n; i++) {
-		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
+		ret = __i915_gem_object_sync(obj, to, req[i]);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 048050176ff9..75d19932847d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -981,7 +981,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;

 		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->engine, &req);
+			ret = i915_gem_object_sync(obj, req);
 			if (ret)
 				return ret;
 		}
@@ -1427,7 +1427,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct drm_i915_gem_request *req = NULL;
 	struct eb_vmas *eb;
 	struct drm_i915_gem_object *batch_obj;
 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
@@ -1615,13 +1614,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);

 	/* Allocate a request for this batch buffer nice and early. */
-	req = i915_gem_request_alloc(engine, ctx);
-	if (IS_ERR(req)) {
-		ret = PTR_ERR(req);
+	params->request = i915_gem_request_alloc(engine, ctx);
+	if (IS_ERR(params->request)) {
+		ret = PTR_ERR(params->request);
 		goto err_batch_unpin;
 	}

-	ret = i915_gem_request_add_to_client(req, file);
+	ret = i915_gem_request_add_to_client(params->request, file);
 	if (ret)
 		goto err_request;

@@ -1637,7 +1636,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	params->dispatch_flags          = dispatch_flags;
 	params->batch_obj               = batch_obj;
 	params->ctx                     = ctx;
-	params->request                 = req;

 	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
 err_request:
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 7b772d914e23..995ae99e9cf7 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -292,10 +292,21 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 	return 0;
 }

-static inline int
-__i915_gem_request_alloc(struct intel_engine_cs *engine,
-			 struct i915_gem_context *ctx,
-			 struct drm_i915_gem_request **req_out)
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *       This can be NULL if the request is not directly related to
+ *       any specific user context, in which case this function will
+ *       choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
@@ -303,18 +314,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	u32 seqno;
 	int ret;

-	if (!req_out)
-		return -EINVAL;
-
-	*req_out = NULL;
-
 	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
 	 */
 	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);

 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->request_list,
@@ -324,7 +330,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,

 	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
 	if (!req)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);

 	ret = i915_gem_get_seqno(dev_priv, &seqno);
 	if (ret)
@@ -357,39 +363,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		goto err_ctx;

-	*req_out = req;
-	return 0;
+	return req;

 err_ctx:
 	i915_gem_context_put(ctx);
 err:
 	kmem_cache_free(dev_priv->requests, req);
-	return ret;
-}
-
-/**
- * i915_gem_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *       This can be NULL if the request is not directly related to
- *       any specific user context, in which case this function will
- *       choose an appropriate context to use.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
-struct drm_i915_gem_request *
-i915_gem_request_alloc(struct intel_engine_cs *engine,
-		       struct i915_gem_context *ctx)
-{
-	struct drm_i915_gem_request *req;
-	int err;
-
-	if (!ctx)
-		ctx = engine->i915->kernel_context;
-	err = __i915_gem_request_alloc(engine, ctx, &req);
-	return err ? ERR_PTR(err) : req;
+	return ERR_PTR(ret);
 }

 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 007112d1e049..9e43c0aa6e3b 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -449,10 +449,9 @@ TRACE_EVENT(i915_gem_evict_vm,
 );

 TRACE_EVENT(i915_gem_ring_sync_to,
-	    TP_PROTO(struct drm_i915_gem_request *to_req,
-		     struct intel_engine_cs *from,
-		     struct drm_i915_gem_request *req),
-	    TP_ARGS(to_req, from, req),
+	    TP_PROTO(struct drm_i915_gem_request *to,
+		     struct drm_i915_gem_request *from),
+	    TP_ARGS(to, from),

 	    TP_STRUCT__entry(
 			     __field(u32, dev)
@@ -463,9 +462,9 @@ TRACE_EVENT(i915_gem_ring_sync_to,

 	    TP_fast_assign(
 			   __entry->dev = from->i915->drm.primary->index;
-			   __entry->sync_from = from->id;
-			   __entry->sync_to = to_req->engine->id;
-			   __entry->seqno = req->fence.seqno;
+			   __entry->sync_from = from->engine->id;
+			   __entry->sync_to = to->engine->id;
+			   __entry->seqno = from->fence.seqno;
 			   ),

 	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bff172c45ff7..5d4420b67642 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11583,7 +11583,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	struct intel_flip_work *work;
 	struct intel_engine_cs *engine;
 	bool mmio_flip;
-	struct drm_i915_gem_request *request = NULL;
+	struct drm_i915_gem_request *request;
 	int ret;

 	/*
@@ -11690,22 +11690,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,

 	mmio_flip = use_mmio_flip(engine, obj);

-	/* When using CS flips, we want to emit semaphores between rings.
-	 * However, when using mmio flips we will create a task to do the
-	 * synchronisation, so all we want here is to pin the framebuffer
-	 * into the display plane and skip any waits.
-	 */
-	if (!mmio_flip) {
-		ret = i915_gem_object_sync(obj, engine, &request);
-		if (!ret && !request) {
-			request = i915_gem_request_alloc(engine, NULL);
-			ret = PTR_ERR_OR_ZERO(request);
-		}
-
-		if (ret)
-			goto cleanup_pending;
-	}
-
 	ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
 	if (ret)
 		goto cleanup_pending;
@@ -11723,14 +11707,24 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,

 		schedule_work(&work->mmio_work);
 	} else {
-		i915_gem_request_assign(&work->flip_queued_req, request);
+		request = i915_gem_request_alloc(engine, engine->last_context);
+		if (IS_ERR(request)) {
+			ret = PTR_ERR(request);
+			goto cleanup_unpin;
+		}
+
+		ret = i915_gem_object_sync(obj, request);
+		if (ret)
+			goto cleanup_request;
+
 		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
 						   page_flip_flags);
 		if (ret)
-			goto cleanup_unpin;
+			goto cleanup_request;

 		intel_mark_page_flip_active(intel_crtc, work);

+		work->flip_queued_req = i915_gem_request_get(request);
 		i915_add_request_no_flush(request);
 	}

@@ -11745,11 +11739,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,

 	return 0;

+cleanup_request:
+	i915_add_request_no_flush(request);
 cleanup_unpin:
 	intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
 cleanup_pending:
-	if (!IS_ERR_OR_NULL(request))
-		i915_add_request_no_flush(request);
 	atomic_dec(&intel_crtc->unpin_work_count);
 	mutex_unlock(&dev->struct_mutex);
 cleanup:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6487794ac96c..a37af1533618 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -655,7 +655,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;

 		if (obj->active & other_rings) {
-			ret = i915_gem_object_sync(obj, req->engine, &req);
+			ret = i915_gem_object_sync(obj, req);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 8f1d4d9ef345..651efe4e468e 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -229,11 +229,18 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 	return 0;
 }

+static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay)
+{
+	struct drm_i915_private *dev_priv = overlay->i915;
+	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
+
+	return i915_gem_request_alloc(engine, dev_priv->kernel_context);
+}
+
 /* overlay needs to be disable in OCMD reg */
 static int intel_overlay_on(struct intel_overlay *overlay)
 {
 	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	struct drm_i915_gem_request *req;
 	struct intel_ring *ring;
 	int ret;
@@ -241,7 +248,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	WARN_ON(overlay->active);
 	WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));

-	req = i915_gem_request_alloc(engine, NULL);
+	req = alloc_request(overlay);
 	if (IS_ERR(req))
 		return PTR_ERR(req);

@@ -268,7 +275,6 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 				  bool load_polyphase_filter)
 {
 	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	struct drm_i915_gem_request *req;
 	struct intel_ring *ring;
 	u32 flip_addr = overlay->flip_addr;
@@ -285,7 +291,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);

-	req = i915_gem_request_alloc(engine, NULL);
+	req = alloc_request(overlay);
 	if (IS_ERR(req))
 		return PTR_ERR(req);

@@ -338,7 +344,6 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
 static int intel_overlay_off(struct intel_overlay *overlay)
 {
 	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	struct drm_i915_gem_request *req;
 	struct intel_ring *ring;
 	u32 flip_addr = overlay->flip_addr;
@@ -352,7 +357,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	 * of the hw. Do it in both cases */
 	flip_addr |= OFC_UPDATE;

-	req = i915_gem_request_alloc(engine, NULL);
+	req = alloc_request(overlay);
 	if (IS_ERR(req))
 		return PTR_ERR(req);

@@ -412,7 +417,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
 	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	int ret;

 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -428,7 +432,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		struct drm_i915_gem_request *req;
 		struct intel_ring *ring;

-		req = i915_gem_request_alloc(engine, NULL);
+		req = alloc_request(overlay);
 		if (IS_ERR(req))
 			return PTR_ERR(req);


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx