[PATCH 1/3] drm/i915: Add error_recovery option to i915_handle_error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



So that we gain more fine grained control how we want
to do the recovery. The aim is to grab the gpu state and
skip everything related to recovery.

Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h |  9 ++++++++-
 drivers/gpu/drm/i915/i915_irq.c | 28 +++++++++++++++-------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3212d62..23e7f20 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2288,8 +2288,15 @@ void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
 
 /* i915_irq.c */
 void i915_queue_hangcheck(struct drm_device *dev);
+
+enum recovery_type {
+	NO_RESET,
+	RESET,
+	DEBUGFS,
+};
+
 __printf(3, 4)
-void i915_handle_error(struct drm_device *dev, bool wedged,
+void i915_handle_error(struct drm_device *dev, enum recovery_type recovery,
 		       const char *fmt, ...);
 
 void gen6_set_pm_mask(struct drm_i915_private *dev_priv, u32 pm_iir,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a2b013d..21008a2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1320,7 +1320,7 @@ static void snb_gt_irq_handler(struct drm_device *dev,
 	if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
 		      GT_BSD_CS_ERROR_INTERRUPT |
 		      GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) {
-		i915_handle_error(dev, false, "GT error interrupt 0x%08x",
+		i915_handle_error(dev, NO_RESET, "GT error interrupt 0x%08x",
 				  gt_iir);
 	}
 
@@ -1716,7 +1716,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
 
 		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) {
-			i915_handle_error(dev_priv->dev, false,
+			i915_handle_error(dev_priv->dev, NO_RESET,
 					  "VEBOX CS error interrupt 0x%08x",
 					  pm_iir);
 		}
@@ -2548,7 +2548,7 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
  * so userspace knows something bad happened (should trigger collection
  * of a ring dump etc.).
  */
-void i915_handle_error(struct drm_device *dev, bool wedged,
+void i915_handle_error(struct drm_device *dev, enum recovery_type recovery,
 		       const char *fmt, ...)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2559,10 +2559,11 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
 	va_end(args);
 
-	i915_capture_error_state(dev, wedged, error_msg);
-	i915_report_and_clear_eir(dev);
+	i915_capture_error_state(dev, recovery == RESET, error_msg);
+	if (recovery != DEBUGFS)
+		i915_report_and_clear_eir(dev);
 
-	if (wedged) {
+	if (recovery == RESET) {
 		atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
 				&dev_priv->gpu_error.reset_counter);
 
@@ -2588,7 +2589,8 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	 * dev-priv->wq work queue for otherwise the flush_work in the pageflip
 	 * code will deadlock.
 	 */
-	schedule_work(&dev_priv->gpu_error.work);
+	if (recovery != DEBUGFS)
+		schedule_work(&dev_priv->gpu_error.work);
 }
 
 /* Called from drm generic code, passed 'crtc' which
@@ -2888,7 +2890,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 	 */
 	tmp = I915_READ_CTL(ring);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev, false,
+		i915_handle_error(dev, NO_RESET,
 				  "Kicking stuck wait on %s",
 				  ring->name);
 		I915_WRITE_CTL(ring, tmp);
@@ -2900,7 +2902,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 		default:
 			return HANGCHECK_HUNG;
 		case 1:
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Kicking stuck semaphore on %s",
 					  ring->name);
 			I915_WRITE_CTL(ring, tmp);
@@ -3028,7 +3030,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
 	}
 
 	if (rings_hung)
-		return i915_handle_error(dev, true, "Ring hung");
+		return i915_handle_error(dev, RESET, "Ring hung");
 
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
@@ -3777,7 +3779,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		 */
 		spin_lock(&dev_priv->irq_lock);
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Command parser error, iir 0x%08x",
 					  iir);
 
@@ -3962,7 +3964,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		 */
 		spin_lock(&dev_priv->irq_lock);
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Command parser error, iir 0x%08x",
 					  iir);
 
@@ -4189,7 +4191,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		 */
 		spin_lock(&dev_priv->irq_lock);
 		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-			i915_handle_error(dev, false,
+			i915_handle_error(dev, NO_RESET,
 					  "Command parser error, iir 0x%08x",
 					  iir);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux