[PATCH] drm/i915: Hide one invalid cancellation bug in i915_switch_context()

Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> · Thu, 17 Dec 2015 18:18:05 +0000

As we add the VMA to the request early, it may be cancelled during
execbuf reservation. This will leave the context object pointing to a
dangling request; i915_wait_request() simply skips the wait and so we
may unbind the object whilst it is still active.

We can partially prevent such atrocity by doing the RCS context
initialisation earlier. This ensures that one callsite from blowing up
(and for igt this is a frequent culprit due to how the stressful batches
are submitted).

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
---
 drivers/gpu/drm/i915/i915_gem_context.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 900ffd044db8..657686e6492f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -657,7 +657,6 @@ static int do_switch(struct drm_i915_gem_request *req)
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_context *from = ring->last_context;
 	u32 hw_flags = 0;
-	bool uninitialized = false;
 	int ret, i;
 
 	if (from != NULL && ring == &dev_priv->ring[RCS]) {
@@ -764,6 +763,15 @@ static int do_switch(struct drm_i915_gem_request *req)
 			to->remap_slice &= ~(1<<i);
 	}
 
+	if (!to->legacy_hw_ctx.initialized) {
+		if (ring->init_context) {
+			ret = ring->init_context(req);
+			if (ret)
+				goto unpin_out;
+		}
+		to->legacy_hw_ctx.initialized = true;
+	}
+
 	/* The backing object for the context is done after switching to the
 	 * *next* context. Therefore we cannot retire the previous context until
 	 * the next context has already started running. In fact, the below code
@@ -772,6 +780,14 @@ static int do_switch(struct drm_i915_gem_request *req)
 	 */
 	if (from != NULL) {
 		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		/* XXX Note very well this is dangerous!
+		 * We are pinning this object using this request as our
+		 * active reference. However, this request may yet be cancelled
+		 * during the execbuf dispatch, leaving us waiting on a
+		 * dangling request. Waiting upon this dangling request is
+		 * ignored, which means that we may unbind the context whilst
+		 * the GPU is still writing to the backing storage.
+		 */
 		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
@@ -787,21 +803,10 @@ static int do_switch(struct drm_i915_gem_request *req)
 		i915_gem_context_unreference(from);
 	}
 
-	uninitialized = !to->legacy_hw_ctx.initialized;
-	to->legacy_hw_ctx.initialized = true;
-
 done:
 	i915_gem_context_reference(to);
 	ring->last_context = to;
 
-	if (uninitialized) {
-		if (ring->init_context) {
-			ret = ring->init_context(req);
-			if (ret)
-				DRM_ERROR("ring init context: %d\n", ret);
-		}
-	}
-
 	return 0;
 
 unpin_out:
-- 
2.6.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx