Re: [PATCH] drm/i915: Revoke fenced GTT mmapings across GPU reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 04/01/2017 14:51, Chris Wilson wrote:
During the fence registers are clobbered by a GPU reset. Hence if

During the reset I suppose, although it would sound still a bit redundant. So maybe "Since GPU reset clobbers the fence registers, if there is concurrent..."

there is concurrent user access to a fenced region via a GTT mmaping,
the access will not be fenced during the reset (until we restore the
fences afterwards). In order to prevent invalid access during the reset,
before we clobber the fences we must invalidate the GTT mmapings. Access
to the mmap will then be forced to fault the page, and in handling the
fault, i915_gem_fault() will take the struct_mutex and wait upon the
reset to complete.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99274
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.c           |  3 ++-
 drivers/gpu/drm/i915/i915_drv.h           |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c           |  7 ++++++-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 24 ++++++++++++++++++++++++
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 72e29fcb1638..39463fcab593 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1780,6 +1780,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 	error->reset_count++;

 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
+	i915_gem_reset_prepare(dev_priv);

 	disable_engines_irq(dev_priv);
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
@@ -1793,7 +1794,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}

-	i915_gem_reset(dev_priv);
+	i915_gem_reset_finish(dev_priv);
 	intel_overlay_reset(dev_priv);

 	/* Ok, now get things going again... */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c035b6576efa..f16986cfa127 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3355,7 +3355,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }

-void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);

 void i915_gem_clflush_init(struct drm_i915_private *i915);
@@ -3426,6 +3427,7 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
 int __must_check i915_vma_get_fence(struct i915_vma *vma);
 int __must_check i915_vma_put_fence(struct i915_vma *vma);

+void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
 void i915_gem_restore_fences(struct drm_i915_private *dev_priv);

 void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73987014a7bd..019ea7cb13b5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2715,6 +2715,11 @@ static void reset_request(struct drm_i915_gem_request *request)
 	memset(vaddr + head, 0, request->postfix - head);
 }

+void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+{
+	i915_gem_revoke_fences(dev_priv);
+}
+
 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
@@ -2780,7 +2785,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }

-void i915_gem_reset(struct drm_i915_private *dev_priv)
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 67835d7b39c7..447abb1105ab 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -358,6 +358,30 @@ i915_vma_get_fence(struct i915_vma *vma)
 }

 /**
+ * i915_gem_revoke_fences - revoke fence state
+ * @dev_priv: i915 device private
+ *
+ * Clears the hw fence state and removes all GTT mmappings via the fence. This

The function doesn't clear the hw fence state - reset does that, no?

+ * forces any user of the fence to reacquire that fence before continuing.
+ * One use is during GPU reset where the fence register is lost and we need to
+ * revoke concurrent user access via GTT mmaps until the hardware has been
+ * reset..
+ */
+void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
+{
+	int i;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
+
+		if (fence->vma)
+			i915_gem_release_mmap(fence->vma->obj);
+	}
+}
+
+/**
  * i915_gem_restore_fences - restore fence state
  * @dev_priv: i915 device private
  *


You beat me to solving this interesting bug. :) With the comments/commit slightly improved:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux