We have a place already for error handling and error state capture, i915_gpu_error.c. Move code to more appropriate file. No functional changes. Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_gpu_error.c | 157 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_irq.c | 151 -------------------------------- 3 files changed, 155 insertions(+), 157 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6c0b0a6..88301fa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3686,9 +3686,7 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } -void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg); + void i915_error_state_get(struct drm_device *dev, struct i915_error_state_file_priv *error_priv); void i915_error_state_put(struct i915_error_state_file_priv *error_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 204093f..e307841 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1585,9 +1585,9 @@ static int capture(void *data) * out a structure which becomes available in debugfs for user level tools * to pick up. */ -void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg) +static void i915_capture_error_state(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *error_msg) { static bool warned; struct drm_i915_error_state *error; @@ -1640,6 +1640,108 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } } +static void i915_clear_error_registers(struct drm_i915_private *dev_priv) +{ + u32 eir; + + if (!IS_GEN2(dev_priv)) + I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER)); + + if (INTEL_GEN(dev_priv) < 4) + I915_WRITE(IPEIR, I915_READ(IPEIR)); + else + I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965)); + + I915_WRITE(EIR, I915_READ(EIR)); + eir = I915_READ(EIR); + if (eir) { + /* + * some errors might have become stuck, + * mask them. + */ + DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); + I915_WRITE(EMR, I915_READ(EMR) | eir); + I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); + } +} + +static void i915_error_wake_up(struct drm_i915_private *dev_priv) +{ + /* + * Notify all waiters for GPU completion events that reset state has + * been changed, and that they need to restart their wait after + * checking for potential errors (and bail out to drop locks if there is + * a gpu reset pending so that i915_error_work_func can acquire them). + */ + + /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ + wake_up_all(&dev_priv->gpu_error.wait_queue); + + /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ + wake_up_all(&dev_priv->pending_flip_queue); +} + +/** + * i915_reset_and_wakeup - do process context error handling work + * @dev_priv: i915 device private + * + * Fire an error uevent so userspace can see that a hang or error + * was detected. + */ +static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) +{ + struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; + char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; + char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; + char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; + + kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); + + DRM_DEBUG_DRIVER("resetting chip\n"); + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); + + /* + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugs, so get an RPM reference. + */ + intel_runtime_pm_get(dev_priv); + intel_prepare_reset(dev_priv); + + do { + /* + * All state reset _must_ be completed before we update the + * reset counter, for otherwise waiters might miss the reset + * pending state and not properly drop locks, resulting in + * deadlocks with the reset work. + */ + if (mutex_trylock(&dev_priv->drm.struct_mutex)) { + i915_reset(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + /* We need to wait for anyone holding the lock to wakeup */ + } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, + I915_RESET_IN_PROGRESS, + TASK_UNINTERRUPTIBLE, + HZ)); + + intel_finish_reset(dev_priv); + intel_runtime_pm_put(dev_priv); + + if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) + kobject_uevent_env(kobj, + KOBJ_CHANGE, reset_done_event); + + /* + * Note: The wake_up also serves as a memory barrier so that + * waiters see the updated value of the dev_priv->gpu_error. + */ + wake_up_all(&dev_priv->gpu_error.reset_queue); +} + void i915_error_state_get(struct drm_device *dev, struct i915_error_state_file_priv *error_priv) { @@ -1671,3 +1773,52 @@ void i915_destroy_error_state(struct drm_device *dev) if (error) kref_put(&error->ref, i915_error_state_free); } + +/** + * i915_handle_error - handle a gpu error + * @dev_priv: i915 device private + * @engine_mask: mask representing engines that are hung + * Do some basic checking of register state at error time and + * dump it to the syslog. Also call i915_capture_error_state() to make + * sure we get a record and make it available in debugfs. Fire a uevent + * so userspace knows something bad happened (should trigger collection + * of a ring dump etc.). + * @fmt: Error message format string + */ +void i915_handle_error(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *fmt, ...) +{ + va_list args; + char error_msg[80]; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + i915_capture_error_state(dev_priv, engine_mask, error_msg); + i915_clear_error_registers(dev_priv); + + if (!engine_mask) + return; + + if (test_and_set_bit(I915_RESET_IN_PROGRESS, + &dev_priv->gpu_error.flags)) + return; + + /* + * Wakeup waiting processes so that the reset function + * i915_reset_and_wakeup doesn't deadlock trying to grab + * various locks. By bumping the reset counter first, the woken + * processes will see a reset in progress and back off, + * releasing their locks and then wait for the reset completion. + * We must do this for _all_ gpu waiters that might hold locks + * that the reset work needs to acquire. + * + * Note: The wake_up also provides a memory barrier to ensure that the + * waiters see the updated value of the reset flags. + */ + i915_error_wake_up(dev_priv); + + i915_reset_and_wakeup(dev_priv); +} diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6d7505b..f6869f0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2571,83 +2571,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) return ret; } -static void i915_error_wake_up(struct drm_i915_private *dev_priv) -{ - /* - * Notify all waiters for GPU completion events that reset state has - * been changed, and that they need to restart their wait after - * checking for potential errors (and bail out to drop locks if there is - * a gpu reset pending so that i915_error_work_func can acquire them). - */ - - /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */ - wake_up_all(&dev_priv->gpu_error.wait_queue); - - /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */ - wake_up_all(&dev_priv->pending_flip_queue); -} - -/** - * i915_reset_and_wakeup - do process context error handling work - * @dev_priv: i915 device private - * - * Fire an error uevent so userspace can see that a hang or error - * was detected. - */ -static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) -{ - struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; - char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; - char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; - char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - - kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); - - DRM_DEBUG_DRIVER("resetting chip\n"); - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugs, so get an RPM reference. - */ - intel_runtime_pm_get(dev_priv); - intel_prepare_reset(dev_priv); - - do { - /* - * All state reset _must_ be completed before we update the - * reset counter, for otherwise waiters might miss the reset - * pending state and not properly drop locks, resulting in - * deadlocks with the reset work. - */ - if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - } - - /* We need to wait for anyone holding the lock to wakeup */ - } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, - I915_RESET_IN_PROGRESS, - TASK_UNINTERRUPTIBLE, - HZ)); - - intel_finish_reset(dev_priv); - intel_runtime_pm_put(dev_priv); - - if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); - - /* - * Note: The wake_up also serves as a memory barrier so that - * waiters see the updated value of the dev_priv->gpu_error. - */ - wake_up_all(&dev_priv->gpu_error.reset_queue); -} - static inline void i915_err_print_instdone(struct drm_i915_private *dev_priv, struct intel_instdone *instdone) @@ -2674,80 +2597,6 @@ i915_err_print_instdone(struct drm_i915_private *dev_priv, slice, subslice, instdone->row[slice][subslice]); } -static void i915_clear_error_registers(struct drm_i915_private *dev_priv) -{ - u32 eir; - - if (!IS_GEN2(dev_priv)) - I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER)); - - if (INTEL_GEN(dev_priv) < 4) - I915_WRITE(IPEIR, I915_READ(IPEIR)); - else - I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965)); - - I915_WRITE(EIR, I915_READ(EIR)); - eir = I915_READ(EIR); - if (eir) { - /* - * some errors might have become stuck, - * mask them. - */ - DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); - I915_WRITE(EMR, I915_READ(EMR) | eir); - I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); - } -} - -/** - * i915_handle_error - handle a gpu error - * @dev_priv: i915 device private - * @engine_mask: mask representing engines that are hung - * Do some basic checking of register state at error time and - * dump it to the syslog. Also call i915_capture_error_state() to make - * sure we get a record and make it available in debugfs. Fire a uevent - * so userspace knows something bad happened (should trigger collection - * of a ring dump etc.). - * @fmt: Error message format string - */ -void i915_handle_error(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *fmt, ...) -{ - va_list args; - char error_msg[80]; - - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); - - i915_capture_error_state(dev_priv, engine_mask, error_msg); - i915_clear_error_registers(dev_priv); - - if (!engine_mask) - return; - - if (test_and_set_bit(I915_RESET_IN_PROGRESS, - &dev_priv->gpu_error.flags)) - return; - - /* - * Wakeup waiting processes so that the reset function - * i915_reset_and_wakeup doesn't deadlock trying to grab - * various locks. By bumping the reset counter first, the woken - * processes will see a reset in progress and back off, - * releasing their locks and then wait for the reset completion. - * We must do this for _all_ gpu waiters that might hold locks - * that the reset work needs to acquire. - * - * Note: The wake_up also provides a memory barrier to ensure that the - * waiters see the updated value of the reset flags. - */ - i915_error_wake_up(dev_priv); - - i915_reset_and_wakeup(dev_priv); -} - /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx