Re: [PATCH 8/9] drm/i915: Track the previous pinned context inside the request

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Tue, 19 Apr 2016 13:02:26 +0100

On 19/04/16 07:49, Chris Wilson wrote:
As the contexts are accessed by the hardware until the switch is completed
to a new context, the hardware may still be writing to the context object
after the breadcrumb is visible. We must not unpin/unbind/prune that
object whilst still active and so we keep the previous context pinned until
the following request. If we move this tracking onto the request, we can
simplify the code and enable freeing of the request without the
struct_mutex in subsequent patches.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_gem_request.c |  8 ++++----
  drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++++++
  drivers/gpu/drm/i915/intel_lrc.c        | 12 +++++-------
  3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 33aacf1725dd..8d7c415f1896 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -643,12 +643,12 @@ void i915_gem_request_free(struct kref *req_ref)
  	if (req->file_priv)
  		i915_gem_request_remove_from_client(req);

-	if (ctx) {
+	if (req->pinned_context) {
  		if (i915.enable_execlists)
-			intel_lr_context_unpin(ctx, req->engine);
-
-		i915_gem_context_unreference(ctx);
+			intel_lr_context_unpin(req->pinned_context,
+					       req->engine);
  	}

+	i915_gem_context_unreference(ctx);
  	kmem_cache_free(to_i915(req)->requests, req);
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 69a4d4e2c97b..389813cbc19a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -85,6 +85,17 @@ struct drm_i915_gem_request {
  	struct intel_context *ctx;
  	struct intel_ringbuffer *ringbuf;

+	/**
+	 * Context related to the previous request.
+	 * As the contexts are accessed by the hardware until the switch is
+	 * completed to a new context, the hardware may still be writing
+	 * to the context object after the breadcrumb is visible. We must
+	 * not unpin/unbind/prune that object whilst still active and so
+	 * we keep the previous context pinned until the following (this)
+	 * request is retired.
+	 */
+	struct intel_context *pinned_context;
+
  	/** Batch buffer related to this request if any (used for
  	    error state dump only) */
  	struct drm_i915_gem_object *batch_obj;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0d20af38574..0e55f206e592 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -708,6 +708,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
  		request->ctx->engine[engine->id].initialised = true;
  	}

+	request->pinned_context = request->ctx;

Add a little bit of comment to the big one above explaining the 
possibility of pinned_context being, not the previous, but the current 
one before submission?

  	return 0;
  }

@@ -782,12 +783,8 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
  	intel_logical_ring_emit(ringbuf, MI_NOOP);
  	intel_logical_ring_advance(ringbuf);

-	if (engine->last_context != request->ctx) {
-		if (engine->last_context)
-			intel_lr_context_unpin(engine->last_context, engine);
-		intel_lr_context_pin(request->ctx, engine);
-		engine->last_context = request->ctx;
-	}
+	request->pinned_context = engine->last_context;
+	engine->last_context = request->ctx;

I am not sure if this is very complicated or just very different from my 
approach. Either way after thinking long and hard I cannot fault it. 
Looks like it will work.


  	if (dev_priv->guc.execbuf_client)
  		i915_guc_submit(dev_priv->guc.execbuf_client, request);
@@ -1009,7 +1006,8 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
  	spin_unlock_bh(&engine->execlist_lock);

  	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
-		intel_lr_context_unpin(req->ctx, engine);
+		if (req->pinned_context)
+			intel_lr_context_unpin(req->pinned_context, engine);

  		list_del(&req->execlist_link);
  		i915_gem_request_unreference(req);


Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

I suppose you did not see any performance effect since you decided to 
turn it on for both GuC and execlists? (Assuming vma iomap is in place.)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx