Re: [PATCH 14/29] drm/i915: Explicitly pin the logical context for execbuf

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Mon, 8 Apr 2019 16:17:19 +0100

On 08/04/2019 10:17, Chris Wilson wrote:
In order to separate the reservation phase of building a request from
its emission phase, we need to pull some of the request alloc activities
from deep inside i915_request to the surface, GEM_EXECBUFFER.

I had some comments and gave conditional r-b on this one in the previous 
round.

Just preserving one comment basically and caching eb->i915.

Regards,

Tvrtko

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 110 +++++++++++++--------
  drivers/gpu/drm/i915/i915_request.c        |   9 --
  2 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d672c9edb94..35732c2ae17f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,8 +34,11 @@
  #include <drm/drm_syncobj.h>
  #include <drm/i915_drm.h>
  
+#include "gt/intel_gt_pm.h"
+
  #include "i915_drv.h"
  #include "i915_gem_clflush.h"
+#include "i915_gem_pm.h"
  #include "i915_trace.h"
  #include "intel_drv.h"
  #include "intel_frontbuffer.h"
@@ -236,7 +239,8 @@ struct i915_execbuffer {
  	unsigned int *flags;
  
  	struct intel_engine_cs *engine; /** engine to queue the request to */
-	struct i915_gem_context *ctx; /** context for building the request */
+	struct intel_context *context; /* logical state for the request */
+	struct i915_gem_context *gem_context; /** caller's context */
  	struct i915_address_space *vm; /** GTT and vma for the request */
  
  	struct i915_request *request; /** our request to build */
@@ -738,7 +742,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
  	if (unlikely(!ctx))
  		return -ENOENT;
  
-	eb->ctx = ctx;
+	eb->gem_context = ctx;
  	if (ctx->ppgtt) {
  		eb->vm = &ctx->ppgtt->vm;
  		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -761,7 +765,7 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
  	 * Completely unscientific finger-in-the-air estimates for suitable
  	 * maximum user request size (to avoid blocking) and then backoff.
  	 */
-	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+	if (!ring || intel_ring_update_space(ring) >= PAGE_SIZE)
  		return NULL;
  
  	/*
@@ -784,7 +788,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
  
  static int eb_wait_for_ring(const struct i915_execbuffer *eb)
  {
-	const struct intel_context *ce;
  	struct i915_request *rq;
  	int ret = 0;
  
@@ -794,11 +797,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
  	 * keeping all of their resources pinned.
  	 */
  
-	ce = intel_context_lookup(eb->ctx, eb->engine);
-	if (!ce || !ce->ring) /* first use, assume empty! */
-		return 0;
-
-	rq = __eb_wait_for_ring(ce->ring);
+	rq = __eb_wait_for_ring(eb->context->ring);
  	if (rq) {
  		mutex_unlock(&eb->i915->drm.struct_mutex);
  
@@ -817,15 +816,15 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
  
  static int eb_lookup_vmas(struct i915_execbuffer *eb)
  {
-	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
+	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
  	struct drm_i915_gem_object *obj;
  	unsigned int i, batch;
  	int err;
  
-	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
  		return -ENOENT;
  
-	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
+	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
  		return -EIO;
  
  	INIT_LIST_HEAD(&eb->relocs);
@@ -870,8 +869,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
  		if (!vma->open_count++)
  			i915_vma_reopen(vma);
  		list_add(&lut->obj_link, &obj->lut_list);
-		list_add(&lut->ctx_link, &eb->ctx->handles_list);
-		lut->ctx = eb->ctx;
+		list_add(&lut->ctx_link, &eb->gem_context->handles_list);
+		lut->ctx = eb->gem_context;
  		lut->handle = handle;
  
  add_vma:
@@ -1227,7 +1226,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
  	if (err)
  		goto err_unmap;
  
-	rq = i915_request_alloc(eb->engine, eb->ctx);
+	rq = i915_request_create(eb->context);
  	if (IS_ERR(rq)) {
  		err = PTR_ERR(rq);
  		goto err_unpin;
@@ -2088,8 +2087,41 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
  	[I915_EXEC_VEBOX]	= VECS0
  };
  
-static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+static int eb_pin_context(struct i915_execbuffer *eb,
+			  struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	int err;
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	err = i915_terminally_wedged(eb->i915);
+	if (err)
+		return err;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = intel_context_pin(eb->gem_context, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	eb->engine = engine;
+	eb->context = ce;
+	return 0;
+}
+
+static void eb_unpin_context(struct i915_execbuffer *eb)
+{
+	intel_context_unpin(eb->context);
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb,
  		 struct drm_file *file,
  		 struct drm_i915_gem_execbuffer2 *args)
  {
@@ -2098,21 +2130,21 @@ eb_select_engine(struct drm_i915_private *dev_priv,
  
  	if (user_ring_id > I915_USER_RINGS) {
  		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
-		return NULL;
+		return -EINVAL;
  	}
  
  	if ((user_ring_id != I915_EXEC_BSD) &&
  	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
  		DRM_DEBUG("execbuf with non bsd ring but with invalid "
  			  "bsd dispatch flags: %d\n", (int)(args->flags));
-		return NULL;
+		return -EINVAL;
  	}
  
-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
  		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
  
  		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
  		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
  			   bsd_idx <= I915_EXEC_BSD_RING2) {
  			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2120,20 +2152,20 @@ eb_select_engine(struct drm_i915_private *dev_priv,
  		} else {
  			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
  				  bsd_idx);
-			return NULL;
+			return -EINVAL;
  		}
  
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
  	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
  	}
  
  	if (!engine) {
  		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
-		return NULL;
+		return -EINVAL;
  	}
  
-	return engine;
+	return eb_pin_context(eb, engine);
  }
  
  static void
@@ -2275,7 +2307,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	struct i915_execbuffer eb;
  	struct dma_fence *in_fence = NULL;
  	struct sync_file *out_fence = NULL;
-	intel_wakeref_t wakeref;
  	int out_fence_fd = -1;
  	int err;
  
@@ -2335,12 +2366,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	if (unlikely(err))
  		goto err_destroy;
  
-	eb.engine = eb_select_engine(eb.i915, file, args);
-	if (!eb.engine) {
-		err = -EINVAL;
-		goto err_engine;
-	}
-
  	/*
  	 * Take a local wakeref for preparing to dispatch the execbuf as
  	 * we expect to access the hardware fairly frequently in the
@@ -2348,16 +2373,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	 * wakeref that we hold until the GPU has been idle for at least
  	 * 100ms.
  	 */
-	wakeref = intel_runtime_pm_get(eb.i915);
+	intel_gt_pm_get(eb.i915);
  
  	err = i915_mutex_lock_interruptible(dev);
  	if (err)
  		goto err_rpm;
  
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	err = eb_select_engine(&eb, file, args);
  	if (unlikely(err))
  		goto err_unlock;
  
+	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	if (unlikely(err))
+		goto err_engine;
+
  	err = eb_relocate(&eb);
  	if (err) {
  		/*
@@ -2441,7 +2470,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	GEM_BUG_ON(eb.reloc_cache.rq);
  
  	/* Allocate a request for this batch buffer nice and early. */
-	eb.request = i915_request_alloc(eb.engine, eb.ctx);
+	eb.request = i915_request_create(eb.context);
  	if (IS_ERR(eb.request)) {
  		err = PTR_ERR(eb.request);
  		goto err_batch_unpin;
@@ -2479,8 +2508,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  	trace_i915_request_queue(eb.request, eb.batch_flags);
  	err = eb_submit(&eb);
  err_request:
-	i915_request_add(eb.request);
  	add_to_client(eb.request, file);
+	i915_request_add(eb.request);
  
  	if (fences)
  		signal_fence_array(&eb, fences);
@@ -2502,12 +2531,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
  err_vma:
  	if (eb.exec)
  		eb_release_vmas(&eb);
+err_engine:
+	eb_unpin_context(&eb);
  err_unlock:
  	mutex_unlock(&dev->struct_mutex);
  err_rpm:
-	intel_runtime_pm_put(eb.i915, wakeref);
-err_engine:
-	i915_gem_context_put(eb.ctx);
+	intel_gt_pm_put(eb.i915);
+	i915_gem_context_put(eb.gem_context);
  err_destroy:
  	eb_destroy(&eb);
  err_out_fence:
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7b973b3c6ecf..7932d1209247 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -753,7 +753,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
  	struct drm_i915_private *i915 = engine->i915;
  	struct intel_context *ce;
  	struct i915_request *rq;
-	int ret;
  
  	/*
  	 * Preempt contexts are reserved for exclusive use to inject a
@@ -762,14 +761,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
  	 */
  	GEM_BUG_ON(ctx == i915->preempt_context);
  
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ERR_PTR(ret);
-
  	/*
  	 * Pinning the contexts may generate requests in order to acquire
  	 * GGTT space, so do this first before we reserve a seqno for

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx