Having taken the first step in encapsulating the functionality by moving the related files under gt/, the next step is to start encapsulating by passing around the relevant structs rather than the global drm_i915_private. In this step, we pass intel_gt to intel_reset.c Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> --- drivers/gpu/drm/i915/display/intel_display.c | 22 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 8 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 25 +- drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 2 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 20 +- .../i915/gem/selftests/i915_gem_client_blt.c | 4 +- .../i915/gem/selftests/i915_gem_coherency.c | 6 +- .../drm/i915/gem/selftests/i915_gem_context.c | 17 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 2 +- .../i915/gem/selftests/i915_gem_object_blt.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine.h | 8 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 16 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c | 7 + drivers/gpu/drm/i915/gt/intel_gt.h | 12 + drivers/gpu/drm/i915/gt/intel_gt_pm.c | 22 +- drivers/gpu/drm/i915/gt/intel_gt_types.h | 12 + drivers/gpu/drm/i915/gt/intel_hangcheck.c | 67 ++- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 435 ++++++++--------- drivers/gpu/drm/i915/gt/intel_reset.h | 73 +-- drivers/gpu/drm/i915/gt/intel_reset_types.h | 50 ++ drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 461 +++++++++--------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 38 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 93 ++-- drivers/gpu/drm/i915/gt/selftest_timeline.c | 3 +- .../gpu/drm/i915/gt/selftest_workarounds.c | 33 +- drivers/gpu/drm/i915/i915_debugfs.c | 63 +-- drivers/gpu/drm/i915/i915_drv.c | 5 +- drivers/gpu/drm/i915/i915_drv.h | 35 +- drivers/gpu/drm/i915/i915_gem.c | 31 +- drivers/gpu/drm/i915/i915_gpu_error.h | 52 +- drivers/gpu/drm/i915/i915_request.c | 5 +- drivers/gpu/drm/i915/i915_selftest.h | 9 + drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_uc.c | 2 +- drivers/gpu/drm/i915/selftests/i915_active.c | 3 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 3 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 3 +- drivers/gpu/drm/i915/selftests/i915_request.c | 4 +- .../gpu/drm/i915/selftests/i915_selftest.c | 23 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 5 +- drivers/gpu/drm/i915/selftests/igt_reset.c | 38 +- drivers/gpu/drm/i915/selftests/igt_reset.h | 10 +- drivers/gpu/drm/i915/selftests/igt_wedge_me.h | 58 --- .../gpu/drm/i915/selftests/mock_gem_device.c | 5 - 49 files changed, 897 insertions(+), 910 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_reset_types.h delete mode 100644 drivers/gpu/drm/i915/selftests/igt_wedge_me.h diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ed3532e4c8ad..736a781ea302 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -4275,12 +4275,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) return; /* We have a modeset vs reset deadlock, defensively unbreak it. */ - set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.wait_queue); + set_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags); + smp_mb__after_atomic(); + wake_up_bit(&dev_priv->gt.reset.flags, I915_RESET_MODESET); if (atomic_read(&dev_priv->gpu_error.pending_fb_pin)) { DRM_DEBUG_KMS("Modeset potentially stuck, unbreaking through wedging\n"); - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(&dev_priv->gt); } /* @@ -4326,7 +4327,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) int ret; /* reset doesn't touch the display */ - if (!test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags)) + if (!test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags)) return; state = fetch_and_zero(&dev_priv->modeset_restore_state); @@ -4366,7 +4367,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) drm_modeset_acquire_fini(ctx); mutex_unlock(&dev->mode_config.mutex); - clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); + clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags); } static void icl_set_pipe_chicken(struct intel_crtc *crtc) @@ -13861,18 +13862,21 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat for (;;) { prepare_to_wait(&intel_state->commit_ready.wait, &wait_fence, TASK_UNINTERRUPTIBLE); - prepare_to_wait(&dev_priv->gpu_error.wait_queue, + prepare_to_wait(bit_waitqueue(&dev_priv->gt.reset.flags, + I915_RESET_MODESET), &wait_reset, TASK_UNINTERRUPTIBLE); - if (i915_sw_fence_done(&intel_state->commit_ready) - || test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags)) + if (i915_sw_fence_done(&intel_state->commit_ready) || + test_bit(I915_RESET_MODESET, &dev_priv->gt.reset.flags)) break; schedule(); } finish_wait(&intel_state->commit_ready.wait, &wait_fence); - finish_wait(&dev_priv->gpu_error.wait_queue, &wait_reset); + finish_wait(bit_waitqueue(&dev_priv->gt.reset.flags, + I915_RESET_MODESET), + &wait_reset); } static void intel_atomic_cleanup_work(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9d4ba6928884..cea56884cfd2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2134,7 +2134,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) return -EINVAL; - ret = i915_terminally_wedged(i915); + ret = intel_gt_terminally_wedged(&i915->gt); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 0ea2d49bc8b9..3506aafada6c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2148,7 +2148,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. */ - err = i915_terminally_wedged(eb->i915); + err = intel_gt_terminally_wedged(ce->engine->gt); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index bcc53c2b4f69..71d10ae90922 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -7,6 +7,8 @@ #include <linux/mman.h> #include <linux/sizes.h> +#include "gt/intel_gt.h" + #include "i915_drv.h" #include "i915_gem_gtt.h" #include "i915_gem_ioctls.h" @@ -243,7 +245,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = i915_reset_trylock(i915); + srcu = intel_gt_reset_trylock(ggtt->vm.gt); if (srcu < 0) { ret = srcu; goto err_rpm; @@ -322,7 +324,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) err_unlock: mutex_unlock(&dev->struct_mutex); err_reset: - i915_reset_unlock(i915, srcu); + intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: intel_runtime_pm_put(rpm, wakeref); i915_gem_object_unpin_pages(obj); @@ -335,7 +337,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) * fail). But any other -EIO isn't ours (e.g. swap in failure) * and so needs to be reported. */ - if (!i915_terminally_wedged(i915)) + if (!intel_gt_is_wedged(ggtt->vm.gt)) return VM_FAULT_SIGBUS; /* else: fall through */ case -EAGAIN: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index bf085b0cb7c6..8e2eeaec06cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -5,6 +5,7 @@ */ #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "i915_drv.h" @@ -103,18 +104,18 @@ static int pm_notifier(struct notifier_block *nb, return NOTIFY_OK; } -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +static bool switch_to_kernel_context_sync(struct intel_gt *gt) { - bool result = !i915_terminally_wedged(i915); + bool result = !intel_gt_is_wedged(gt); do { - if (i915_gem_wait_for_idle(i915, + if (i915_gem_wait_for_idle(gt->i915, I915_WAIT_LOCKED | I915_WAIT_FOR_IDLE_BOOST, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* XXX hide warning from gem_eio */ if (i915_modparams.reset) { - dev_err(i915->drm.dev, + dev_err(gt->i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); GEM_TRACE_DUMP(); } @@ -123,18 +124,18 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) * Forcibly cancel outstanding work and leave * the gpu quiet. */ - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); result = false; } - } while (i915_retire_requests(i915) && result); + } while (i915_retire_requests(gt->i915) && result); - GEM_BUG_ON(i915->gt.awake); + GEM_BUG_ON(gt->awake); return result; } bool i915_gem_load_power_context(struct drm_i915_private *i915) { - return switch_to_kernel_context_sync(i915); + return switch_to_kernel_context_sync(&i915->gt); } void i915_gem_suspend(struct drm_i915_private *i915) @@ -155,7 +156,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(i915); + switch_to_kernel_context_sync(&i915->gt); mutex_unlock(&i915->drm.struct_mutex); @@ -166,7 +167,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) GEM_BUG_ON(i915->gt.awake); flush_work(&i915->gem.idle_work); - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); @@ -274,10 +275,10 @@ void i915_gem_resume(struct drm_i915_private *i915) return; err_wedged: - if (!i915_reset_failed(i915)) { + if (!intel_gt_is_wedged(&i915->gt)) { dev_err(i915->drm.dev, "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); } goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index adb3074d9ce2..1e372420771b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, long ret; /* ABI: return -EIO if already wedged */ - ret = i915_terminally_wedged(to_i915(dev)); + ret = intel_gt_terminally_wedged(&to_i915(dev)->gt); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 86eed4c3ae2b..6cbd4a668c9a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1753,7 +1753,7 @@ int i915_gem_huge_page_mock_selftests(void) return err; } -int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_shrink_thp), @@ -1768,22 +1768,22 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) intel_wakeref_t wakeref; int err; - if (!HAS_PPGTT(dev_priv)) { + if (!HAS_PPGTT(i915)) { pr_info("PPGTT not supported, skipping live-selftests\n"); return 0; } - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - file = mock_file(dev_priv); + file = mock_file(i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(dev_priv, file); + ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_unlock; @@ -1795,10 +1795,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) err = i915_subtests(tests, ctx); out_unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(dev_priv, file); + mock_file_free(i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index fa79233093eb..275c28926067 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,8 @@ #include "i915_selftest.h" +#include "gt/intel_gt.h" + #include "selftests/igt_flush_test.h" #include "selftests/mock_drm.h" #include "mock_context.h" @@ -101,7 +103,7 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_client_fill), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 861f32be7d46..a1a4b53cdc4a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -6,6 +6,8 @@ #include <linux/prime_numbers.h> +#include "gt/intel_gt.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -242,12 +244,12 @@ static bool always_valid(struct drm_i915_private *i915) static bool needs_fence_registers(struct drm_i915_private *i915) { - return !i915_terminally_wedged(i915); + return !intel_gt_is_wedged(&i915->gt); } static bool needs_mi_store_dword(struct drm_i915_private *i915) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return false; if (!HAS_ENGINE(i915, RCS0)) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 695bfb18b0d4..db7856f0f31e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -83,7 +84,7 @@ static int live_nop_switch(void *arg) } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Failed to populated %d contexts\n", nctx); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto out_unlock; } @@ -127,7 +128,7 @@ static int live_nop_switch(void *arg) if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Switching between %ld contexts timed out\n", prime); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); break; } @@ -956,7 +957,7 @@ __sseu_finish(struct drm_i915_private *i915, int ret = 0; if (flags & TEST_RESET) { - ret = i915_reset_engine(ce->engine, "sseu"); + ret = intel_engine_reset(ce->engine, "sseu"); if (ret) goto out; } @@ -1059,7 +1060,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, return PTR_ERR(file); if (flags & TEST_RESET) - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); mutex_lock(&i915->drm.struct_mutex); @@ -1120,7 +1121,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, mutex_unlock(&i915->drm.struct_mutex); if (flags & TEST_RESET) - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); mock_file_free(i915, file); @@ -1722,7 +1723,7 @@ int i915_gem_context_mock_selftests(void) return err; } -int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), @@ -1733,8 +1734,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) SUBTEST(igt_vm_isolation), }; - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, dev_priv); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5635cbb4af22..01857c12f12f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -478,7 +478,7 @@ static int igt_mmap_offset_exhaustion(void *arg) /* Now fill with busy dead objects that we expect to reap */ for (loop = 0; loop < 3; loop++) { - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) break; obj = i915_gem_object_create_internal(i915, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 11d37238c62c..19843acc84d3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -3,6 +3,8 @@ * Copyright © 2019 Intel Corporation */ +#include "gt/intel_gt.h" + #include "i915_selftest.h" #include "selftests/igt_flush_test.h" @@ -95,7 +97,7 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_fill_blt), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; if (!HAS_ENGINE(i915, BCS0)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index faaa164267f4..a4db4dd22b4f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -410,8 +410,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } -static inline void intel_engine_reset(struct intel_engine_cs *engine, - bool stalled) +static inline void __intel_engine_reset(struct intel_engine_cs *engine, + bool stalled) { if (engine->reset.reset) engine->reset.reset(engine, stalled); @@ -419,9 +419,9 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, } bool intel_engine_is_idle(struct intel_engine_cs *engine); -bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +bool intel_engines_are_idle(struct intel_gt *gt); -void intel_engines_reset_default_submission(struct drm_i915_private *i915); +void intel_engines_reset_default_submission(struct intel_gt *gt); unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e3edf474c377..ee9f9a0cec19 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1147,7 +1147,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) bool intel_engine_is_idle(struct intel_engine_cs *engine) { /* More white lies, if wedged, hw state is inconsistent */ - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) return true; if (!intel_engine_pm_is_awake(engine)) @@ -1183,7 +1183,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return ring_is_idle(engine); } -bool intel_engines_are_idle(struct drm_i915_private *i915) +bool intel_engines_are_idle(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -1192,14 +1192,14 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return true; /* Already parked (and passed an idleness test); must still be idle */ - if (!READ_ONCE(i915->gt.awake)) + if (!READ_ONCE(gt->awake)) return true; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (!intel_engine_is_idle(engine)) return false; } @@ -1207,12 +1207,12 @@ bool intel_engines_are_idle(struct drm_i915_private *i915) return true; } -void intel_engines_reset_default_submission(struct drm_i915_private *i915) +void intel_engines_reset_default_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) engine->set_default_submission(engine); } @@ -1490,7 +1490,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, va_end(ap); } - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) drm_printf(m, "*** WEDGED ***\n"); drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 34376d33e4ca..1218b7d53b88 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -9,6 +9,7 @@ #include "intel_engine.h" #include "intel_engine_pool.h" #include "intel_engine_pm.h" +#include "intel_gt.h" #include "intel_gt_pm.h" static int __engine_unpark(struct intel_wakeref *wf) @@ -77,7 +78,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) return true; /* GPU is pointing to the void, as good as in the kernel context. */ - if (i915_reset_failed(engine->i915)) + if (intel_gt_is_wedged(engine->gt)) return true; /* diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 46d24d9d62ac..90ed79285ff8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -18,6 +18,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->closed_lock); + intel_gt_init_hangcheck(gt); + intel_gt_init_reset(gt); intel_gt_pm_init_early(gt); } @@ -240,3 +242,8 @@ void intel_gt_fini_scratch(struct intel_gt *gt) { i915_vma_unpin_and_release(>->scratch, 0); } + +void intel_gt_cleanup_early(struct intel_gt *gt) +{ + intel_gt_fini_reset(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 1093dcf36f63..49c0085385a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -8,12 +8,15 @@ #include "intel_engine_types.h" #include "intel_gt_types.h" +#include "intel_reset.h" struct drm_i915_private; void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void intel_gt_init_hw(struct drm_i915_private *i915); +void intel_gt_cleanup_early(struct intel_gt *gt); + void intel_gt_check_and_clear_faults(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); @@ -21,6 +24,8 @@ void intel_gt_clear_error_registers(struct intel_gt *gt, void intel_gt_flush_ggtt_writes(struct intel_gt *gt); void intel_gt_chipset_flush(struct intel_gt *gt); +void intel_gt_init_hangcheck(struct intel_gt *gt); + int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size); void intel_gt_fini_scratch(struct intel_gt *gt); @@ -30,4 +35,11 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, return i915_ggtt_offset(gt->scratch) + field; } +static inline bool intel_gt_is_wedged(struct intel_gt *gt) +{ + return __intel_reset_failed(>->reset); +} + +void intel_gt_queue_hangcheck(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 36ba80e6a0b7..1013ba0f4984 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,7 +5,9 @@ */ #include "i915_drv.h" +#include "i915_params.h" #include "intel_engine_pm.h" +#include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -17,8 +19,8 @@ static void pm_notify(struct drm_i915_private *i915, int state) static int intel_gt_unpark(struct intel_wakeref *wf) { - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + struct drm_i915_private *i915 = gt->i915; GEM_TRACE("\n"); @@ -33,8 +35,8 @@ static int intel_gt_unpark(struct intel_wakeref *wf) * Work around it by grabbing a GT IRQ power domain whilst there is any * GT activity, preventing any DC state transitions. */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); + gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!gt->awake); intel_enable_gt_powersave(i915); @@ -44,7 +46,7 @@ static int intel_gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); - i915_queue_hangcheck(i915); + intel_gt_queue_hangcheck(gt); pm_notify(i915, INTEL_GT_UNPARK); @@ -91,12 +93,12 @@ void intel_gt_pm_init_early(struct intel_gt *gt) BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } -static bool reset_engines(struct drm_i915_private *i915) +static bool reset_engines(struct intel_gt *gt) { - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) return false; - return intel_gpu_reset(i915, ALL_ENGINES) == 0; + return intel_gpu_reset(gt, ALL_ENGINES) == 0; } /** @@ -116,11 +118,11 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) GEM_TRACE("\n"); - if (!reset_engines(gt->i915) && !force) + if (!reset_engines(gt) && !force) return; for_each_engine(engine, gt->i915, id) - intel_engine_reset(engine, false); + __intel_engine_reset(engine, false); } int intel_gt_resume(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index b55644132b8f..f35c66b8c67e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -14,12 +14,21 @@ #include <linux/types.h> #include "i915_vma.h" +#include "intel_reset_types.h" #include "intel_wakeref.h" struct drm_i915_private; struct i915_ggtt; struct intel_uncore; +struct intel_hangcheck { + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) + + struct delayed_work work; +}; + struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -39,6 +48,9 @@ struct intel_gt { struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ + struct intel_hangcheck hangcheck; + struct intel_reset reset; + /** * Is the GPU currently considered idle, or busy executing * userspace requests? Whilst idle, we allow runtime power diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 797d8ef0969c..88177f47b4d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -22,8 +22,10 @@ * */ -#include "intel_reset.h" #include "i915_drv.h" +#include "intel_engine.h" +#include "intel_gt.h" +#include "intel_reset.h" struct hangcheck { u64 acthd; @@ -100,7 +102,6 @@ head_stuck(struct intel_engine_cs *engine, u64 acthd) static enum intel_engine_hangcheck_action engine_stuck(struct intel_engine_cs *engine, u64 acthd) { - struct drm_i915_private *dev_priv = engine->i915; enum intel_engine_hangcheck_action ha; u32 tmp; @@ -108,7 +109,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) if (ha != ENGINE_DEAD) return ha; - if (IS_GEN(dev_priv, 2)) + if (IS_GEN(engine->i915, 2)) return ENGINE_DEAD; /* Is the chip hanging on a WAIT_FOR_EVENT? @@ -118,8 +119,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = ENGINE_READ(engine, RING_CTL); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, engine->mask, 0, - "stuck wait on %s", engine->name); + intel_gt_handle_error(engine->gt, engine->mask, 0, + "stuck wait on %s", engine->name); ENGINE_WRITE(engine, RING_CTL, tmp); return ENGINE_WAIT_KICK; } @@ -219,7 +220,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, I915_ENGINE_WEDGED_TIMEOUT); } -static void hangcheck_declare_hang(struct drm_i915_private *i915, +static void hangcheck_declare_hang(struct intel_gt *gt, intel_engine_mask_t hung, intel_engine_mask_t stuck) { @@ -235,12 +236,12 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, hung &= ~stuck; len = scnprintf(msg, sizeof(msg), "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, i915, hung, tmp) + for_each_engine_masked(engine, gt->i915, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); + return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg); } /* @@ -251,11 +252,10 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, * we kick the ring. If we see no progress on three subsequent calls * we assume chip is wedged and try to fix it by resetting the chip. */ -static void i915_hangcheck_elapsed(struct work_struct *work) +static void hangcheck_elapsed(struct work_struct *work) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), - gpu_error.hangcheck_work.work); + struct intel_gt *gt = + container_of(work, typeof(*gt), hangcheck.work.work); intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -264,13 +264,13 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (!i915_modparams.enable_hangcheck) return; - if (!READ_ONCE(dev_priv->gt.awake)) + if (!READ_ONCE(gt->awake)) return; - if (i915_terminally_wedged(dev_priv)) + if (intel_gt_is_wedged(gt)) return; - wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); if (!wakeref) return; @@ -278,9 +278,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) * periodically arm the mmio checker to see if we are triggering * any invalid access. */ - intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore); + intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, gt->i915, id) { struct hangcheck hc; intel_engine_signal_breadcrumbs(engine); @@ -302,7 +302,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (GEM_SHOW_DEBUG() && (hung | stuck)) { struct drm_printer p = drm_debug_printer("hangcheck"); - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, gt->i915, id) { if (intel_engine_is_idle(engine)) continue; @@ -311,20 +311,36 @@ static void i915_hangcheck_elapsed(struct work_struct *work) } if (wedged) { - dev_err(dev_priv->drm.dev, + dev_err(gt->i915->drm.dev, "GPU recovery timed out," " cancelling all in-flight rendering.\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(gt); } if (hung) - hangcheck_declare_hang(dev_priv, hung, stuck); + hangcheck_declare_hang(gt, hung, stuck); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); /* Reset timer in case GPU hangs without another request being added */ - i915_queue_hangcheck(dev_priv); + intel_gt_queue_hangcheck(gt); +} + +void intel_gt_queue_hangcheck(struct intel_gt *gt) +{ + unsigned long delay; + + if (unlikely(!i915_modparams.enable_hangcheck)) + return; + + /* Don't continually defer the hangcheck so that it is always run at + * least once after work has been scheduled on any ring. Otherwise, + * we will ignore a hung ring if a second ring is kept busy. + */ + + delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); + queue_delayed_work(system_long_wq, >->hangcheck.work, delay); } void intel_engine_init_hangcheck(struct intel_engine_cs *engine) @@ -333,10 +349,9 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine) engine->hangcheck.action_timestamp = jiffies; } -void intel_hangcheck_init(struct drm_i915_private *i915) +void intel_gt_init_hangcheck(struct intel_gt *gt) { - INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, - i915_hangcheck_elapsed); + INIT_DELAYED_WORK(>->hangcheck.work, hangcheck_elapsed); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f3539147c7b1..18d9ddc5cd58 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2297,7 +2297,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * and have to at least restore the RING register in the context * image back to the expected values to skip over the guilty request. */ - i915_reset_request(rq, stalled); + __i915_request_reset(rq, stalled); if (!stalled) goto out_replay; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 72002c0f9698..67c06231b004 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -114,7 +114,7 @@ static void context_mark_innocent(struct i915_gem_context *ctx) atomic_inc(&ctx->active_count); } -void i915_reset_request(struct i915_request *rq, bool guilty) +void __i915_request_reset(struct i915_request *rq, bool guilty) { GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n", rq->engine->name, @@ -164,16 +164,15 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) intel_uncore_read_fw(uncore, RING_HEAD(base))); } -static void i915_stop_engines(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) +static void stop_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; - if (INTEL_GEN(i915) < 3) + if (INTEL_GEN(gt->i915) < 3) return; - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) gen3_stop_engine(engine); } @@ -185,11 +184,11 @@ static bool i915_in_reset(struct pci_dev *pdev) return gdrst & GRDOM_RESET_STATUS; } -static int i915_do_reset(struct drm_i915_private *i915, +static int i915_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = gt->i915->drm.pdev; int err; /* Assert reset for at least 20 usec, and wait for acknowledgement. */ @@ -214,22 +213,22 @@ static bool g4x_reset_complete(struct pci_dev *pdev) return (gdrst & GRDOM_RESET_ENABLE) == 0; } -static int g33_do_reset(struct drm_i915_private *i915, +static int g33_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; + struct pci_dev *pdev = gt->i915->drm.pdev; pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for_atomic(g4x_reset_complete(pdev), 50); } -static int g4x_do_reset(struct drm_i915_private *i915, +static int g4x_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct pci_dev *pdev = i915->drm.pdev; - struct intel_uncore *uncore = &i915->uncore; + struct pci_dev *pdev = gt->i915->drm.pdev; + struct intel_uncore *uncore = gt->uncore; int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ @@ -261,11 +260,11 @@ static int g4x_do_reset(struct drm_i915_private *i915, return ret; } -static int ironlake_do_reset(struct drm_i915_private *i915, +static int ironlake_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; int ret; intel_uncore_write_fw(uncore, ILK_GDSR, @@ -297,10 +296,9 @@ static int ironlake_do_reset(struct drm_i915_private *i915, } /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ -static int gen6_hw_domain_reset(struct drm_i915_private *i915, - u32 hw_domain_mask) +static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = gt->uncore; int err; /* @@ -322,7 +320,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *i915, return err; } -static int gen6_reset_engines(struct drm_i915_private *i915, +static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -342,13 +340,13 @@ static int gen6_reset_engines(struct drm_i915_private *i915, intel_engine_mask_t tmp; hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; } } - return gen6_hw_domain_reset(i915, hw_mask); + return gen6_hw_domain_reset(gt, hw_mask); } static u32 gen11_lock_sfc(struct intel_engine_cs *engine) @@ -446,7 +444,7 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); } -static int gen11_reset_engines(struct drm_i915_private *i915, +static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -469,17 +467,17 @@ static int gen11_reset_engines(struct drm_i915_private *i915, hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; hw_mask |= gen11_lock_sfc(engine); } } - ret = gen6_hw_domain_reset(i915, hw_mask); + ret = gen6_hw_domain_reset(gt, hw_mask); if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) gen11_unlock_sfc(engine); return ret; @@ -529,7 +527,7 @@ static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); } -static int gen8_reset_engines(struct drm_i915_private *i915, +static int gen8_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { @@ -538,7 +536,7 @@ static int gen8_reset_engines(struct drm_i915_private *i915, intel_engine_mask_t tmp; int ret; - for_each_engine_masked(engine, i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) goto skip_reset; @@ -554,23 +552,23 @@ static int gen8_reset_engines(struct drm_i915_private *i915, * We rather take context corruption instead of * failed reset with a wedged driver/gpu. And * active bb execution case should be covered by - * i915_stop_engines we have before the reset. + * stop_engines() we have before the reset. */ } - if (INTEL_GEN(i915) >= 11) - ret = gen11_reset_engines(i915, engine_mask, retry); + if (INTEL_GEN(gt->i915) >= 11) + ret = gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(i915, engine_mask, retry); + ret = gen6_reset_engines(gt, engine_mask, retry); skip_reset: - for_each_engine_masked(engine, i915, engine_mask, tmp) + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) gen8_engine_reset_cancel(engine); return ret; } -typedef int (*reset_func)(struct drm_i915_private *, +typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); @@ -592,15 +590,14 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) return NULL; } -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask) +int intel_gpu_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) { const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; reset_func reset; int ret = -ETIMEDOUT; int retry; - reset = intel_get_gpu_reset(i915); + reset = intel_get_gpu_reset(gt->i915); if (!reset) return -ENODEV; @@ -608,7 +605,7 @@ int intel_gpu_reset(struct drm_i915_private *i915, * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { /* * We stop engines, otherwise we might get failed reset and a @@ -625,14 +622,14 @@ int intel_gpu_reset(struct drm_i915_private *i915, * FIXME: Wa for more modern gens needs to be validated */ if (retry) - i915_stop_engines(i915, engine_mask); + stop_engines(gt, engine_mask); GEM_TRACE("engine_mask=%x\n", engine_mask); preempt_disable(); - ret = reset(i915, engine_mask, retry); + ret = reset(gt, engine_mask, retry); preempt_enable(); } - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; } @@ -650,17 +647,17 @@ bool intel_has_reset_engine(struct drm_i915_private *i915) return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; } -int intel_reset_guc(struct drm_i915_private *i915) +int intel_reset_guc(struct intel_gt *gt) { u32 guc_domain = - INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; + INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; int ret; - GEM_BUG_ON(!HAS_GUC(i915)); + GEM_BUG_ON(!HAS_GUC(gt->i915)); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(i915, guc_domain); - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + ret = gen6_hw_domain_reset(gt, guc_domain); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); return ret; } @@ -682,56 +679,55 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) engine->reset.prepare(engine); } -static void revoke_mmaps(struct drm_i915_private *i915) +static void revoke_mmaps(struct intel_gt *gt) { int i; - for (i = 0; i < i915->ggtt.num_fences; i++) { + for (i = 0; i < gt->ggtt->num_fences; i++) { struct drm_vma_offset_node *node; struct i915_vma *vma; u64 vma_offset; - vma = READ_ONCE(i915->ggtt.fence_regs[i].vma); + vma = READ_ONCE(gt->ggtt->fence_regs[i].vma); if (!vma) continue; if (!i915_vma_has_userfault(vma)) continue; - GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]); + GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); node = &vma->obj->base.vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; - unmap_mapping_range(i915->drm.anon_inode->i_mapping, + unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, drm_vma_node_offset_addr(node) + vma_offset, vma->size, 1); } } -static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915) +static intel_engine_mask_t reset_prepare(struct intel_gt *gt) { struct intel_engine_cs *engine; intel_engine_mask_t awake = 0; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); } - intel_uc_reset_prepare(i915); + intel_uc_reset_prepare(gt->i915); return awake; } -static void gt_revoke(struct drm_i915_private *i915) +static void gt_revoke(struct intel_gt *gt) { - revoke_mmaps(i915); + revoke_mmaps(gt); } -static int gt_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) +static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -741,14 +737,14 @@ static int gt_reset(struct drm_i915_private *i915, * Everything depends on having the GTT running, so we need to start * there. */ - err = i915_ggtt_enable_hw(i915); + err = i915_ggtt_enable_hw(gt->i915); if (err) return err; - for_each_engine(engine, i915, id) - intel_engine_reset(engine, stalled_mask & engine->mask); + for_each_engine(engine, gt->i915, id) + __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(i915); + i915_gem_restore_fences(gt->i915); return err; } @@ -761,13 +757,12 @@ static void reset_finish_engine(struct intel_engine_cs *engine) intel_engine_signal_breadcrumbs(engine); } -static void reset_finish(struct drm_i915_private *i915, - intel_engine_mask_t awake) +static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { reset_finish_engine(engine); if (awake & engine->mask) intel_engine_pm_put(engine); @@ -791,20 +786,19 @@ static void nop_submit_request(struct i915_request *request) intel_engine_queue_breadcrumbs(engine); } -static void __i915_gem_set_wedged(struct drm_i915_private *i915) +static void __intel_gt_set_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; intel_engine_mask_t awake; enum intel_engine_id id; - if (test_bit(I915_WEDGED, &error->flags)) + if (test_bit(I915_WEDGED, >->reset.flags)) return; - if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { + if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { struct drm_printer p = drm_debug_printer(__func__); - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -815,17 +809,17 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * rolling the global seqno forward (since this would complete requests * for which we haven't set the fence error to EIO yet). */ - awake = reset_prepare(i915); + awake = reset_prepare(gt); /* Even if the GPU reset fails, it should still stop the engines */ - if (!INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_gpu_reset(i915, ALL_ENGINES); + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_gpu_reset(gt, ALL_ENGINES); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { engine->submit_request = nop_submit_request; engine->schedule = NULL; } - i915->caps.scheduler = 0; + gt->i915->caps.scheduler = 0; /* * Make sure no request can slip through without getting completed by @@ -833,38 +827,36 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * in nop_submit_request. */ synchronize_rcu_expedited(); - set_bit(I915_WEDGED, &error->flags); + set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ - for_each_engine(engine, i915, id) + for_each_engine(engine, gt->i915, id) engine->cancel_requests(engine); - reset_finish(i915, awake); + reset_finish(gt, awake); GEM_TRACE("end\n"); } -void i915_gem_set_wedged(struct drm_i915_private *i915) +void intel_gt_set_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; intel_wakeref_t wakeref; - mutex_lock(&error->wedge_mutex); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - __i915_gem_set_wedged(i915); - mutex_unlock(&error->wedge_mutex); + mutex_lock(>->reset.mutex); + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + __intel_gt_set_wedged(gt); + mutex_unlock(>->reset.mutex); } -static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) +static bool __intel_gt_unset_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; - struct intel_gt_timelines *timelines = &i915->gt.timelines; + struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; - if (!test_bit(I915_WEDGED, &error->flags)) + if (!test_bit(I915_WEDGED, >->reset.flags)) return true; - if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ + if (!gt->scratch) /* Never full initialised, recovery impossible */ return false; GEM_TRACE("start\n"); @@ -905,7 +897,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) } spin_unlock(&timelines->lock); - intel_gt_sanitize(&i915->gt, false); + intel_gt_sanitize(gt, false); /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -916,53 +908,51 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) * the nop_submit_request on reset, we can do this from normal * context and do not require stop_machine(). */ - intel_engines_reset_default_submission(i915); + intel_engines_reset_default_submission(gt); GEM_TRACE("end\n"); smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ - clear_bit(I915_WEDGED, &i915->gpu_error.flags); + clear_bit(I915_WEDGED, >->reset.flags); return true; } -bool i915_gem_unset_wedged(struct drm_i915_private *i915) +bool intel_gt_unset_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; bool result; - mutex_lock(&error->wedge_mutex); - result = __i915_gem_unset_wedged(i915); - mutex_unlock(&error->wedge_mutex); + mutex_lock(>->reset.mutex); + result = __intel_gt_unset_wedged(gt); + mutex_unlock(>->reset.mutex); return result; } -static int do_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask) +static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { int err, i; - gt_revoke(i915); + gt_revoke(gt); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = intel_gpu_reset(gt, ALL_ENGINES); for (i = 0; err && i < RESET_MAX_RETRIES; i++) { msleep(10 * (i + 1)); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = intel_gpu_reset(gt, ALL_ENGINES); } if (err) return err; - return gt_reset(i915, stalled_mask); + return gt_reset(gt, stalled_mask); } -static int resume(struct drm_i915_private *i915) +static int resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; int ret; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { ret = engine->resume(engine); if (ret) return ret; @@ -972,8 +962,8 @@ static int resume(struct drm_i915_private *i915) } /** - * i915_reset - reset chip after a hang - * @i915: #drm_i915_private to reset + * intel_gt_reset - reset chip after a hang + * @gt: #intel_gt to reset * @stalled_mask: mask of the stalled engines with the guilty requests * @reason: user error message for why we are resetting * @@ -988,50 +978,50 @@ static int resume(struct drm_i915_private *i915) * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason) +void intel_gt_reset(struct intel_gt *gt, + intel_engine_mask_t stalled_mask, + const char *reason) { - struct i915_gpu_error *error = &i915->gpu_error; intel_engine_mask_t awake; int ret; - GEM_TRACE("flags=%lx\n", error->flags); + GEM_TRACE("flags=%lx\n", gt->reset.flags); might_sleep(); - GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); - mutex_lock(&error->wedge_mutex); + GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); + mutex_lock(>->reset.mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ - if (!__i915_gem_unset_wedged(i915)) + if (!__intel_gt_unset_wedged(gt)) goto unlock; if (reason) - dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); - error->reset_count++; + dev_notice(gt->i915->drm.dev, + "Resetting chip for %s\n", reason); + atomic_inc(>->i915->gpu_error.reset_count); - awake = reset_prepare(i915); + awake = reset_prepare(gt); - if (!intel_has_gpu_reset(i915)) { + if (!intel_has_gpu_reset(gt->i915)) { if (i915_modparams.reset) - dev_err(i915->drm.dev, "GPU reset not supported\n"); + dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else DRM_DEBUG_DRIVER("GPU reset disabled\n"); goto error; } - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_disable_interrupts(i915); + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_runtime_pm_disable_interrupts(gt->i915); - if (do_reset(i915, stalled_mask)) { - dev_err(i915->drm.dev, "Failed to reset chip\n"); + if (do_reset(gt, stalled_mask)) { + dev_err(gt->i915->drm.dev, "Failed to reset chip\n"); goto taint; } - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - intel_runtime_pm_enable_interrupts(i915); + if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + intel_runtime_pm_enable_interrupts(gt->i915); - intel_overlay_reset(i915); + intel_overlay_reset(gt->i915); /* * Next we need to restore the context, but we don't use those @@ -1041,23 +1031,23 @@ void i915_reset(struct drm_i915_private *i915, * was running at the time of the reset (i.e. we weren't VT * switched away). */ - ret = i915_gem_init_hw(i915); + ret = i915_gem_init_hw(gt->i915); if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); goto taint; } - ret = resume(i915); + ret = resume(gt); if (ret) goto taint; - i915_queue_hangcheck(i915); + intel_gt_queue_hangcheck(gt); finish: - reset_finish(i915, awake); + reset_finish(gt, awake); unlock: - mutex_unlock(&error->wedge_mutex); + mutex_unlock(>->reset.mutex); return; taint: @@ -1075,18 +1065,17 @@ void i915_reset(struct drm_i915_private *i915, */ add_taint_for_CI(TAINT_WARN); error: - __i915_gem_set_wedged(i915); + __intel_gt_set_wedged(gt); goto finish; } -static inline int intel_gt_reset_engine(struct drm_i915_private *i915, - struct intel_engine_cs *engine) +static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) { - return intel_gpu_reset(i915, engine->mask); + return intel_gpu_reset(engine->gt, engine->mask); } /** - * i915_reset_engine - reset GPU engine to recover from a hang + * intel_engine_reset - reset GPU engine to recover from a hang * @engine: engine to reset * @msg: reason for GPU reset; or NULL for no dev_notice() * @@ -1098,13 +1087,13 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915, * - reset engine (which will force the engine to idle) * - re-init/configure engine */ -int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) +int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) { - struct i915_gpu_error *error = &engine->i915->gpu_error; + struct intel_gt *gt = engine->gt; int ret; - GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); - GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags); + GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); if (!intel_engine_pm_get_if_awake(engine)) return 0; @@ -1114,10 +1103,10 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) if (msg) dev_notice(engine->i915->drm.dev, "Resetting %s for %s\n", engine->name, msg); - error->reset_engine_count[engine->id]++; + atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); if (!engine->i915->guc.execbuf_client) - ret = intel_gt_reset_engine(engine->i915, engine); + ret = intel_gt_reset_engine(engine); else ret = intel_guc_reset_engine(&engine->i915->guc, engine); if (ret) { @@ -1133,7 +1122,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - intel_engine_reset(engine, true); + __intel_engine_reset(engine, true); /* * The engine and its registers (and workarounds in case of render) @@ -1149,16 +1138,15 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) return ret; } -static void i915_reset_device(struct drm_i915_private *i915, - u32 engine_mask, - const char *reason) +static void intel_gt_reset_global(struct intel_gt *gt, + u32 engine_mask, + const char *reason) { - struct i915_gpu_error *error = &i915->gpu_error; - struct kobject *kobj = &i915->drm.primary->kdev->kobj; + struct kobject *kobj = >->i915->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; - struct i915_wedge_me w; + struct intel_wedge_me w; kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); @@ -1166,24 +1154,24 @@ static void i915_reset_device(struct drm_i915_private *i915, kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ - i915_wedge_on_timeout(&w, i915, 5 * HZ) { - intel_prepare_reset(i915); + intel_wedge_on_timeout(&w, gt, 5 * HZ) { + intel_prepare_reset(gt->i915); /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(&error->reset_backoff_srcu); + synchronize_srcu_expedited(>->reset.backoff_srcu); - i915_reset(i915, engine_mask, reason); + intel_gt_reset(gt, engine_mask, reason); - intel_finish_reset(i915); + intel_finish_reset(gt->i915); } - if (!test_bit(I915_WEDGED, &error->flags)) + if (!test_bit(I915_WEDGED, >->reset.flags)) kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } /** - * i915_handle_error - handle a gpu error - * @i915: i915 device private + * intel_gt_handle_error - handle a gpu error + * @gt: the intel_gt * @engine_mask: mask representing engines that are hung * @flags: control flags * @fmt: Error message format string @@ -1194,12 +1182,11 @@ static void i915_reset_device(struct drm_i915_private *i915, * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...) +void intel_gt_handle_error(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...) { - struct i915_gpu_error *error = &i915->gpu_error; struct intel_engine_cs *engine; intel_wakeref_t wakeref; intel_engine_mask_t tmp; @@ -1223,33 +1210,31 @@ void i915_handle_error(struct drm_i915_private *i915, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - engine_mask &= INTEL_INFO(i915)->engine_mask; + engine_mask &= INTEL_INFO(gt->i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(i915, engine_mask, msg); - intel_gt_clear_error_registers(&i915->gt, engine_mask); + i915_capture_error_state(gt->i915, engine_mask, msg); + intel_gt_clear_error_registers(gt, engine_mask); } /* * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(i915) && !__i915_wedged(error)) { - for_each_engine_masked(engine, i915, engine_mask, tmp) { + if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) { + for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) + >->reset.flags)) continue; - if (i915_reset_engine(engine, msg) == 0) + if (intel_engine_reset(engine, msg) == 0) engine_mask &= ~engine->mask; - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - wake_up_bit(&error->flags, - I915_RESET_ENGINE + engine->id); + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags); } } @@ -1257,9 +1242,9 @@ void i915_handle_error(struct drm_i915_private *i915, goto out; /* Full reset needs the mutex, stop any other user trying to do so. */ - if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) { - wait_event(error->reset_queue, - !test_bit(I915_RESET_BACKOFF, &error->flags)); + if (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) { + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); goto out; /* piggy-back on the other reset */ } @@ -1267,113 +1252,119 @@ void i915_handle_error(struct drm_i915_private *i915, synchronize_rcu_expedited(); /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, i915, tmp) { + for_each_engine(engine, gt->i915, tmp) { while (test_and_set_bit(I915_RESET_ENGINE + engine->id, - &error->flags)) - wait_on_bit(&error->flags, + >->reset.flags)) + wait_on_bit(>->reset.flags, I915_RESET_ENGINE + engine->id, TASK_UNINTERRUPTIBLE); } - i915_reset_device(i915, engine_mask, msg); + intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, i915, tmp) { - clear_bit(I915_RESET_ENGINE + engine->id, - &error->flags); - } - - clear_bit(I915_RESET_BACKOFF, &error->flags); - wake_up_all(&error->reset_queue); + for_each_engine(engine, gt->i915, tmp) + clear_bit_unlock(I915_RESET_ENGINE + engine->id, + >->reset.flags); + clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); + smp_mb__after_atomic(); + wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); } -int i915_reset_trylock(struct drm_i915_private *i915) +int intel_gt_reset_trylock(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; int srcu; - might_lock(&error->reset_backoff_srcu); + might_lock(>->reset.backoff_srcu); might_sleep(); rcu_read_lock(); - while (test_bit(I915_RESET_BACKOFF, &error->flags)) { + while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { rcu_read_unlock(); - if (wait_event_interruptible(error->reset_queue, + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, - &error->flags))) + >->reset.flags))) return -EINTR; rcu_read_lock(); } - srcu = srcu_read_lock(&error->reset_backoff_srcu); + srcu = srcu_read_lock(>->reset.backoff_srcu); rcu_read_unlock(); return srcu; } -void i915_reset_unlock(struct drm_i915_private *i915, int tag) -__releases(&i915->gpu_error.reset_backoff_srcu) +void intel_gt_reset_unlock(struct intel_gt *gt, int tag) +__releases(>->reset.backoff_srcu) { - struct i915_gpu_error *error = &i915->gpu_error; - - srcu_read_unlock(&error->reset_backoff_srcu, tag); + srcu_read_unlock(>->reset.backoff_srcu, tag); } -int i915_terminally_wedged(struct drm_i915_private *i915) +int intel_gt_terminally_wedged(struct intel_gt *gt) { - struct i915_gpu_error *error = &i915->gpu_error; - might_sleep(); - if (!__i915_wedged(error)) + if (!intel_gt_is_wedged(gt)) return 0; /* Reset still in progress? Maybe we will recover? */ - if (!test_bit(I915_RESET_BACKOFF, &error->flags)) + if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) return -EIO; /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(&i915->drm.struct_mutex)) + if (mutex_is_locked(>->i915->drm.struct_mutex)) return -EAGAIN; - if (wait_event_interruptible(error->reset_queue, + if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, - &error->flags))) + >->reset.flags))) return -EINTR; - return __i915_wedged(error) ? -EIO : 0; + return intel_gt_is_wedged(gt) ? -EIO : 0; +} + +void intel_gt_init_reset(struct intel_gt *gt) +{ + init_waitqueue_head(>->reset.queue); + mutex_init(>->reset.mutex); + init_srcu_struct(>->reset.backoff_srcu); +} + +void intel_gt_fini_reset(struct intel_gt *gt) +{ + cleanup_srcu_struct(>->reset.backoff_srcu); } -static void i915_wedge_me(struct work_struct *work) +static void intel_wedge_me(struct work_struct *work) { - struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); + struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); - dev_err(w->i915->drm.dev, + dev_err(w->gt->i915->drm.dev, "%s timed out, cancelling all in-flight rendering.\n", w->name); - i915_gem_set_wedged(w->i915); + intel_gt_set_wedged(w->gt); } -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) +void __intel_init_wedge(struct intel_wedge_me *w, + struct intel_gt *gt, + long timeout, + const char *name) { - w->i915 = i915; + w->gt = gt; w->name = name; - INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me); + INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); schedule_delayed_work(&w->work, timeout); } -void __i915_fini_wedge(struct i915_wedge_me *w) +void __intel_fini_wedge(struct intel_wedge_me *w) { cancel_delayed_work_sync(&w->work); destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; + w->gt = NULL; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 03fba0ab3868..62f6cb520f96 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -11,56 +11,67 @@ #include <linux/types.h> #include <linux/srcu.h> -#include "gt/intel_engine_types.h" +#include "intel_engine_types.h" +#include "intel_reset_types.h" struct drm_i915_private; struct i915_request; struct intel_engine_cs; +struct intel_gt; struct intel_guc; +void intel_gt_init_reset(struct intel_gt *gt); +void intel_gt_fini_reset(struct intel_gt *gt); + __printf(4, 5) -void i915_handle_error(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask, - unsigned long flags, - const char *fmt, ...); +void intel_gt_handle_error(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned long flags, + const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) -void i915_reset(struct drm_i915_private *i915, - intel_engine_mask_t stalled_mask, - const char *reason); -int i915_reset_engine(struct intel_engine_cs *engine, - const char *reason); - -void i915_reset_request(struct i915_request *rq, bool guilty); +void intel_gt_reset(struct intel_gt *gt, + intel_engine_mask_t stalled_mask, + const char *reason); +int intel_engine_reset(struct intel_engine_cs *engine, + const char *reason); -int __must_check i915_reset_trylock(struct drm_i915_private *i915); -void i915_reset_unlock(struct drm_i915_private *i915, int tag); +void __i915_request_reset(struct i915_request *rq, bool guilty); -int i915_terminally_wedged(struct drm_i915_private *i915); +int __must_check intel_gt_reset_trylock(struct intel_gt *gt); +void intel_gt_reset_unlock(struct intel_gt *gt, int tag); -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); +void intel_gt_set_wedged(struct intel_gt *gt); +bool intel_gt_unset_wedged(struct intel_gt *gt); +int intel_gt_terminally_wedged(struct intel_gt *gt); -int intel_gpu_reset(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); +int intel_gpu_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); -int intel_reset_guc(struct drm_i915_private *i915); +int intel_reset_guc(struct intel_gt *gt); -struct i915_wedge_me { +struct intel_wedge_me { struct delayed_work work; - struct drm_i915_private *i915; + struct intel_gt *gt; const char *name; }; -void __i915_init_wedge(struct i915_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name); -void __i915_fini_wedge(struct i915_wedge_me *w); +void __intel_init_wedge(struct intel_wedge_me *w, + struct intel_gt *gt, + long timeout, + const char *name); +void __intel_fini_wedge(struct intel_wedge_me *w); -#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __i915_fini_wedge((W))) +#define intel_wedge_on_timeout(W, GT, TIMEOUT) \ + for (__intel_init_wedge((W), (GT), (TIMEOUT), __func__); \ + (W)->gt; \ + __intel_fini_wedge((W))) + +static inline bool __intel_reset_failed(const struct intel_reset *reset) +{ + return unlikely(test_bit(I915_WEDGED, &reset->flags)); +} + +bool intel_has_gpu_reset(struct drm_i915_private *i915); +bool intel_has_reset_engine(struct drm_i915_private *i915); #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h new file mode 100644 index 000000000000..31968356e0c0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_RESET_TYPES_H_ +#define __INTEL_RESET_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/wait.h> +#include <linux/srcu.h> + +struct intel_reset { + /** + * flags: Control various stages of the GPU reset + * + * #I915_RESET_BACKOFF - When we start a global reset, we need to + * serialise with any other users attempting to do the same, and + * any global resources that may be clobber by the reset (such as + * FENCE registers). + * + * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to + * acquire the struct_mutex to reset an engine, we need an explicit + * flag to prevent two concurrent reset attempts in the same engine. + * As the number of engines continues to grow, allocate the flags from + * the most significant bits. + * + * #I915_WEDGED - If reset fails and we can no longer use the GPU, + * we set the #I915_WEDGED bit. Prior to command submission, e.g. + * i915_request_alloc(), this bit is checked and the sequence + * aborted (with -EIO reported to userspace) if set. + */ + unsigned long flags; +#define I915_RESET_BACKOFF 0 +#define I915_RESET_MODESET 1 +#define I915_RESET_ENGINE 2 +#define I915_WEDGED (BITS_PER_LONG - 1) + + struct mutex mutex; /* serialises wedging/unwedging */ + + /** + * Waitqueue to signal when the reset has completed. Used by clients + * that wait for dev_priv->mm.wedged to settle. + */ + wait_queue_head_t queue; + + struct srcu_struct backoff_srcu; +}; + +#endif /* _INTEL_RESET_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index e2b5de58913c..189b75074121 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -795,7 +795,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) * If the request was innocent, we try to replay the request * with the restored context. */ - i915_reset_request(rq, stalled); + __i915_request_reset(rq, stalled); GEM_BUG_ON(rq->ring != engine->buffer); head = rq->head; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2d9cc3cd1f27..e2fa38a1ff0f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -32,7 +32,6 @@ #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" -#include "selftests/igt_wedge_me.h" #include "selftests/igt_atomic.h" #include "selftests/mock_drm.h" @@ -43,7 +42,6 @@ #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ struct hang { - struct drm_i915_private *i915; struct intel_gt *gt; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; @@ -52,27 +50,27 @@ struct hang { u32 *batch; }; -static int hang_init(struct hang *h, struct drm_i915_private *i915) +static int hang_init(struct hang *h, struct intel_gt *gt) { void *vaddr; int err; memset(h, 0, sizeof(*h)); - h->i915 = i915; + h->gt = gt; - h->ctx = kernel_context(i915); + h->ctx = kernel_context(gt->i915); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); - h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(h->hws)) { err = PTR_ERR(h->hws); goto err_ctx; } - h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(h->obj)) { err = PTR_ERR(h->obj); goto err_hws; @@ -87,7 +85,7 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915) h->seqno = memset(vaddr, 0xff, PAGE_SIZE); vaddr = i915_gem_object_pin_map(h->obj, - i915_coherent_map_type(i915)); + i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_unpin_hws; @@ -129,7 +127,7 @@ static int move_to_active(struct i915_vma *vma, static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = h->i915; + struct intel_gt *gt = h->gt; struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm; struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; @@ -139,11 +137,11 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) u32 *batch; int err; - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) return ERR_CAST(obj); - vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(i915)); + vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); return ERR_CAST(vaddr); @@ -186,7 +184,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) goto cancel_rq; batch = h->batch; - if (INTEL_GEN(i915) >= 8) { + if (INTEL_GEN(gt->i915) >= 8) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = lower_32_bits(hws_address(hws, rq)); *batch++ = upper_32_bits(hws_address(hws, rq)); @@ -200,7 +198,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; *batch++ = lower_32_bits(vma->node.start); *batch++ = upper_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 6) { + } else if (INTEL_GEN(gt->i915) >= 6) { *batch++ = MI_STORE_DWORD_IMM_GEN4; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -213,7 +211,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) *batch++ = MI_ARB_CHECK; *batch++ = MI_BATCH_BUFFER_START | 1 << 8; *batch++ = lower_32_bits(vma->node.start); - } else if (INTEL_GEN(i915) >= 4) { + } else if (INTEL_GEN(gt->i915) >= 4) { *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *batch++ = 0; *batch++ = lower_32_bits(hws_address(hws, rq)); @@ -249,7 +247,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) } flags = 0; - if (INTEL_GEN(i915) <= 5) + if (INTEL_GEN(gt->i915) <= 5) flags |= I915_DISPATCH_SECURE; err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); @@ -286,7 +284,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915, I915_WAIT_LOCKED); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -301,7 +299,7 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq) static int igt_hang_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_request *rq; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -310,13 +308,13 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, i915, id) { - struct igt_wedge_me w; + for_each_engine(engine, gt->i915, id) { + struct intel_wedge_me w; long timeout; if (!intel_engine_can_store_dword(engine)) @@ -338,10 +336,10 @@ static int igt_hang_sanitycheck(void *arg) i915_request_add(rq); timeout = 0; - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) timeout = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) timeout = -EIO; i915_request_put(rq); @@ -357,7 +355,7 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -368,7 +366,8 @@ static bool wait_for_idle(struct intel_engine_cs *engine) static int igt_reset_nop(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; struct i915_gem_context *ctx; unsigned int reset_count, count; @@ -379,25 +378,25 @@ static int igt_reset_nop(void *arg) /* Check that we can reset during non-user portions of requests */ - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + ctx = live_context(gt->i915, file); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } i915_gem_context_clear_bannable(ctx); - reset_count = i915_reset_count(&i915->gpu_error); + reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { int i; for (i = 0; i < 16; i++) { @@ -413,43 +412,43 @@ static int igt_reset_nop(void *arg) } } - igt_global_reset_lock(i915); - i915_reset(i915, ALL_ENGINES, NULL); - igt_global_reset_unlock(i915); + igt_global_reset_lock(gt); + intel_gt_reset(gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(gt); - mutex_unlock(&i915->drm.struct_mutex); - if (i915_reset_failed(i915)) { + mutex_unlock(>->i915->drm.struct_mutex); + if (intel_gt_is_wedged(gt)) { err = -EIO; break; } - if (i915_reset_count(&i915->gpu_error) != - reset_count + ++count) { + if (i915_reset_count(global) != reset_count + ++count) { pr_err("Full GPU reset not recorded!\n"); err = -EINVAL; break; } - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915, 0); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) + mock_file_free(gt->i915, file); + if (intel_gt_is_wedged(gt)) err = -EIO; return err; } static int igt_reset_nop_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; @@ -458,33 +457,32 @@ static int igt_reset_nop_engine(void *arg) /* Check that we can engine-reset during non-user portions */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + ctx = live_context(gt->i915, file); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } i915_gem_context_clear_bannable(ctx); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { unsigned int reset_count, reset_engine_count; unsigned int count; IGT_TIMEOUT(end_time); - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); + reset_count = i915_reset_count(global); + reset_engine_count = i915_reset_engine_count(global, engine); count = 0; - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { int i; @@ -495,7 +493,7 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; @@ -507,20 +505,20 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } - err = i915_reset_engine(engine, NULL); - mutex_unlock(&i915->drm.struct_mutex); + err = intel_engine_reset(engine, NULL); + mutex_unlock(>->i915->drm.struct_mutex); if (err) { pr_err("i915_reset_engine failed\n"); break; } - if (i915_reset_count(&i915->gpu_error) != reset_count) { + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } - if (i915_reset_engine_count(&i915->gpu_error, engine) != + if (i915_reset_engine_count(global, engine) != reset_engine_count + ++count) { pr_err("%s engine reset not recorded!\n", engine->name); @@ -528,30 +526,31 @@ static int igt_reset_nop_engine(void *arg) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); if (err) break; - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915, 0); if (err) break; } - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); out: - mock_file_free(i915, file); - if (i915_reset_failed(i915)) + mock_file_free(gt->i915, file); + if (intel_gt_is_wedged(gt)) err = -EIO; return err; } -static int __igt_reset_engine(struct drm_i915_private *i915, bool active) +static int __igt_reset_engine(struct intel_gt *gt, bool active) { + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -559,18 +558,18 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) /* Check that we can issue an engine reset on an idle engine (no-op) */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; if (active) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); + mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { unsigned int reset_count, reset_engine_count; IGT_TIMEOUT(end_time); @@ -584,30 +583,29 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - reset_count = i915_reset_count(&i915->gpu_error); - reset_engine_count = i915_reset_engine_count(&i915->gpu_error, - engine); + reset_count = i915_reset_count(global); + reset_engine_count = i915_reset_engine_count(global, engine); intel_engine_pm_get(engine); - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { if (active) { struct i915_request *rq; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); @@ -622,19 +620,19 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) i915_request_put(rq); } - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; } - if (i915_reset_count(&i915->gpu_error) != reset_count) { + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; break; } - if (i915_reset_engine_count(&i915->gpu_error, engine) != + if (i915_reset_engine_count(global, engine) != ++reset_engine_count) { pr_err("%s engine reset not recorded!\n", engine->name); @@ -642,24 +640,24 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_pm_put(engine); if (err) break; - err = igt_flush_test(i915, 0); + err = igt_flush_test(gt->i915, 0); if (err) break; } - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; if (active) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); } return err; @@ -701,7 +699,7 @@ static int active_request_put(struct i915_request *rq) rq->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(rq->i915); + intel_gt_set_wedged(rq->engine->gt); err = -EIO; } @@ -778,10 +776,11 @@ static int active_engine(void *data) return err; } -static int __igt_reset_engines(struct drm_i915_private *i915, +static int __igt_reset_engines(struct intel_gt *gt, const char *test_name, unsigned int flags) { + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; struct hang h; @@ -791,13 +790,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915, * with any other engine. */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); + mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; @@ -805,9 +804,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915, h.ctx->sched.priority = 1024; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct active_engine threads[I915_NUM_ENGINES] = {}; - unsigned long global = i915_reset_count(&i915->gpu_error); + unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; IGT_TIMEOUT(end_time); @@ -823,12 +822,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } memset(threads, 0, sizeof(threads)); - for_each_engine(other, i915, tmp) { + for_each_engine(other, gt->i915, tmp) { struct task_struct *tsk; threads[tmp].resets = - i915_reset_engine_count(&i915->gpu_error, - other); + i915_reset_engine_count(global, other); if (!(flags & TEST_OTHERS)) continue; @@ -851,25 +849,25 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } intel_engine_pm_get(engine); - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); @@ -882,7 +880,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } } - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); if (err) { pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", engine->name, test_name, err); @@ -894,7 +892,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (rq) { if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" " failed to complete request after reset\n", @@ -904,7 +902,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, i915_request_put(rq); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; break; } @@ -914,7 +912,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("i915_reset_engine(%s:%s):" " failed to idle after reset\n", @@ -926,12 +924,12 @@ static int __igt_reset_engines(struct drm_i915_private *i915, break; } } while (time_before(jiffies, end_time)); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_pm_put(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); - reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported = i915_reset_engine_count(global, engine); reported -= threads[engine->id].resets; if (reported != count) { pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", @@ -941,7 +939,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } unwind: - for_each_engine(other, i915, tmp) { + for_each_engine(other, gt->i915, tmp) { int ret; if (!threads[tmp].task) @@ -956,22 +954,21 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } put_task_struct(threads[tmp].task); - if (other != engine && + if (other->uabi_class != engine->uabi_class && threads[tmp].resets != - i915_reset_engine_count(&i915->gpu_error, other)) { + i915_reset_engine_count(global, other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", other->name, - i915_reset_engine_count(&i915->gpu_error, - other) - + i915_reset_engine_count(global, other) - threads[tmp].resets); if (!err) err = -EINVAL; } } - if (global != i915_reset_count(&i915->gpu_error)) { + if (device != i915_reset_count(global)) { pr_err("Global reset (count=%ld)!\n", - i915_reset_count(&i915->gpu_error) - global); + i915_reset_count(global) - device); if (!err) err = -EINVAL; } @@ -979,20 +976,20 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (err) break; - mutex_lock(&i915->drm.struct_mutex); - err = igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); if (err) break; } - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; if (flags & TEST_ACTIVE) { - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); hang_fini(&h); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); } return err; @@ -1018,13 +1015,13 @@ static int igt_reset_engines(void *arg) }, { } }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; typeof(*phases) *p; int err; for (p = phases; p->name; p++) { if (p->flags & TEST_PRIORITY) { - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) continue; } @@ -1036,38 +1033,39 @@ static int igt_reset_engines(void *arg) return 0; } -static u32 fake_hangcheck(struct drm_i915_private *i915, - intel_engine_mask_t mask) +static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask) { - u32 count = i915_reset_count(&i915->gpu_error); + u32 count = i915_reset_count(>->i915->gpu_error); - i915_reset(i915, mask, NULL); + intel_gt_reset(gt, mask, NULL); return count; } static int igt_reset_wait(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS0])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we detect a stuck waiter and issue a reset */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - rq = hang_create_request(&h, i915->engine[RCS0]); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto fini; @@ -1077,19 +1075,19 @@ static int igt_reset_wait(void *arg) i915_request_add(rq); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto out_rq; } - reset_count = fake_hangcheck(i915, ALL_ENGINES); + reset_count = fake_hangcheck(gt, ALL_ENGINES); timeout = i915_request_wait(rq, 0, 10); if (timeout < 0) { @@ -1099,7 +1097,7 @@ static int igt_reset_wait(void *arg) goto out_rq; } - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(global) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; goto out_rq; @@ -1110,10 +1108,10 @@ static int igt_reset_wait(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + mutex_unlock(>->i915->drm.struct_mutex); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1172,11 +1170,12 @@ static int evict_fence(void *data) return err; } -static int __igt_reset_evict_vma(struct drm_i915_private *i915, +static int __igt_reset_evict_vma(struct intel_gt *gt, struct i915_address_space *vm, int (*fn)(void *), unsigned int flags) { + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; @@ -1184,17 +1183,17 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, struct hang h; int err; - if (!intel_engine_can_store_dword(i915->engine[RCS0])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - obj = i915_gem_object_create_internal(i915, SZ_1M); + obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fini; @@ -1214,7 +1213,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, goto out_obj; } - rq = hang_create_request(&h, i915->engine[RCS0]); + rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_obj; @@ -1252,16 +1251,16 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, if (err) goto out_rq; - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto out_reset; } @@ -1278,31 +1277,31 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, wait_for_completion(&arg.completion); if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("igt/evict_vma kthread did not wait\n"); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto out_reset; } out_reset: - igt_global_reset_lock(i915); - fake_hangcheck(rq->i915, rq->engine->mask); - igt_global_reset_unlock(i915); + igt_global_reset_lock(gt); + fake_hangcheck(gt, rq->engine->mask); + igt_global_reset_unlock(gt); if (tsk) { - struct igt_wedge_me w; + struct intel_wedge_me w; /* The reset, even indirectly, should take less than 10ms. */ - igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) err = kthread_stop(tsk); put_task_struct(tsk); } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: @@ -1310,9 +1309,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1320,26 +1319,26 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, static int igt_reset_evict_ggtt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + return __igt_reset_evict_vma(gt, >->ggtt->vm, evict_vma, EXEC_OBJECT_WRITE); } static int igt_reset_evict_ppgtt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx; struct drm_file *file; int err; - file = mock_file(i915); + file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + ctx = live_context(gt->i915, file); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -1347,29 +1346,29 @@ static int igt_reset_evict_ppgtt(void *arg) err = 0; if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(i915, ctx->vm, + err = __igt_reset_evict_vma(gt, ctx->vm, evict_vma, EXEC_OBJECT_WRITE); out: - mock_file_free(i915, file); + mock_file_free(gt->i915, file); return err; } static int igt_reset_evict_fence(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; - return __igt_reset_evict_vma(i915, &i915->ggtt.vm, + return __igt_reset_evict_vma(gt, >->ggtt->vm, evict_fence, EXEC_OBJECT_NEEDS_FENCE); } -static int wait_for_others(struct drm_i915_private *i915, +static int wait_for_others(struct intel_gt *gt, struct intel_engine_cs *exclude) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { if (engine == exclude) continue; @@ -1382,7 +1381,8 @@ static int wait_for_others(struct drm_i915_private *i915, static int igt_reset_queue(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; struct intel_engine_cs *engine; enum intel_engine_id id; struct hang h; @@ -1390,14 +1390,14 @@ static int igt_reset_queue(void *arg) /* Check that we replay pending requests following a hang */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); - mutex_lock(&i915->drm.struct_mutex); - err = hang_init(&h, i915); + mutex_lock(>->i915->drm.struct_mutex); + err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -1438,7 +1438,7 @@ static int igt_reset_queue(void *arg) * (hangcheck), or we focus on resetting just one * engine and so avoid repeatedly resetting innocents. */ - err = wait_for_others(i915, engine); + err = wait_for_others(gt, engine); if (err) { pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", __func__, engine->name); @@ -1446,12 +1446,12 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); goto fini; } if (!wait_until_running(&h, prev)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, @@ -1462,13 +1462,13 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto fini; } - reset_count = fake_hangcheck(i915, BIT(id)); + reset_count = fake_hangcheck(gt, BIT(id)); if (prev->fence.error != -EIO) { pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", @@ -1488,7 +1488,7 @@ static int igt_reset_queue(void *arg) goto fini; } - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(global) == reset_count) { pr_err("No GPU reset recorded!\n"); i915_request_put(rq); i915_request_put(prev); @@ -1507,7 +1507,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); if (err) break; } @@ -1515,10 +1515,10 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + mutex_unlock(>->i915->drm.struct_mutex); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; return err; @@ -1526,8 +1526,9 @@ static int igt_reset_queue(void *arg) static int igt_handle_error(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine = gt->i915->engine[RCS0]; struct hang h; struct i915_request *rq; struct i915_gpu_state *error; @@ -1535,15 +1536,15 @@ static int igt_handle_error(void *arg) /* Check that we can issue a global GPU and engine reset */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); - err = hang_init(&h, i915); + err = hang_init(&h, gt); if (err) goto err_unlock; @@ -1557,28 +1558,28 @@ static int igt_handle_error(void *arg) i915_request_add(rq); if (!wait_until_running(&h, rq)) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to start request %llx, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); err = -EIO; goto err_request; } - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); /* Temporarily disable error capture */ - error = xchg(&i915->gpu_error.first_error, (void *)-1); + error = xchg(&global->first_error, (void *)-1); - i915_handle_error(i915, engine->mask, 0, NULL); + intel_gt_handle_error(gt, engine->mask, 0, NULL); - xchg(&i915->gpu_error.first_error, error); + xchg(&global->first_error, error); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); @@ -1591,7 +1592,7 @@ static int igt_handle_error(void *arg) err_fini: hang_fini(&h); err_unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -1608,7 +1609,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, tasklet_disable_nosync(t); p->critical_section_begin(); - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); p->critical_section_end(); tasklet_enable(t); @@ -1623,7 +1624,6 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, static int igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p) { - struct drm_i915_private *i915 = engine->i915; struct i915_request *rq; struct hang h; int err; @@ -1632,7 +1632,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, if (err) return err; - err = hang_init(&h, i915); + err = hang_init(&h, engine->gt); if (err) return err; @@ -1651,16 +1651,16 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, pr_err("%s(%s): Failed to start request %llx, at %x\n", __func__, engine->name, rq->fence.seqno, hws_seqno(&h, rq)); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(engine->gt); err = -EIO; } if (err == 0) { - struct igt_wedge_me w; + struct intel_wedge_me w; - igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/) + intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */) i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; } @@ -1672,30 +1672,30 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, static int igt_reset_engines_atomic(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; int err = 0; /* Check that the engines resets are usable from atomic context */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - igt_global_reset_lock(i915); - mutex_lock(&i915->drm.struct_mutex); + igt_global_reset_lock(gt); + mutex_lock(>->i915->drm.struct_mutex); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto unlock; for (p = igt_atomic_phases; p->name; p++) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { err = igt_atomic_reset_engine(engine, p); if (err) goto out; @@ -1704,11 +1704,11 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); + igt_force_reset(gt); unlock: - mutex_unlock(&i915->drm.struct_mutex); - igt_global_reset_unlock(i915); + mutex_unlock(>->i915->drm.struct_mutex); + igt_global_reset_unlock(gt); return err; } @@ -1730,28 +1730,29 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_evict_fence), SUBTEST(igt_handle_error), }; + struct intel_gt *gt = &i915->gt; intel_wakeref_t wakeref; bool saved_hangcheck; int err; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt->i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); - drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ + drain_delayed_work(>->hangcheck.work); /* flush param */ - err = i915_live_subtests(tests, i915); + err = intel_gt_live_subtests(tests, gt); - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); + mutex_lock(>->i915->drm.struct_mutex); + igt_flush_test(gt->i915, I915_WAIT_LOCKED); + mutex_unlock(>->i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index b9b881ab8e7c..678e9b2edf8d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -55,7 +55,7 @@ static int live_sanitycheck(void *arg) if (!igt_wait_for_spinner(&spin, rq)) { GEM_TRACE("spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx; } @@ -211,7 +211,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); GEM_TRACE_DUMP(); - i915_gem_set_wedged(outer->i915); + intel_gt_set_wedged(outer->gt); err = -EIO; } @@ -445,7 +445,7 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_vma; } @@ -534,7 +534,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -551,7 +551,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -688,7 +688,7 @@ static int live_late_preempt(void *arg) err_wedged: igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -824,7 +824,7 @@ static int live_nopreempt(void *arg) err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_b; } @@ -934,7 +934,7 @@ static int live_suppress_self_preempt(void *arg) err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_b; } @@ -1105,7 +1105,7 @@ static int live_suppress_wait_preempt(void *arg) err_wedged: for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_3; } @@ -1251,7 +1251,7 @@ static int live_chain_preempt(void *arg) err_wedged: igt_spinner_end(&hi.spin); igt_spinner_end(&lo.spin); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_client_lo; } @@ -1310,7 +1310,7 @@ static int live_preempt_hang(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -1332,21 +1332,21 @@ static int live_preempt_hang(void *arg) HZ / 10)) { pr_err("Preemption did not occur within timeout!"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } - set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); - i915_reset_engine(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + intel_engine_reset(engine, NULL); + clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); engine->execlists.preempt_hang.inject_hang = false; if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto err_ctx_lo; } @@ -1726,7 +1726,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, request[nc]->fence.context, request[nc]->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); break; } } @@ -1873,7 +1873,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, request[n]->fence.context, request[n]->fence.seqno); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); err = -EIO; goto out; } @@ -2150,7 +2150,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (!HAS_EXECLISTS(i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 672e32e1ef95..813a9176c436 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -9,26 +9,29 @@ static int igt_global_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; unsigned int reset_count; + intel_wakeref_t wakeref; int err = 0; /* Check that we can issue a global GPU reset */ - igt_global_reset_lock(i915); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - reset_count = i915_reset_count(&i915->gpu_error); + reset_count = i915_reset_count(>->i915->gpu_error); - i915_reset(i915, ALL_ENGINES, NULL); + intel_gt_reset(gt, ALL_ENGINES, NULL); - if (i915_reset_count(&i915->gpu_error) == reset_count) { + if (i915_reset_count(>->i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); err = -EINVAL; } - igt_global_reset_unlock(i915); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + igt_global_reset_unlock(gt); - if (i915_reset_failed(i915)) + if (intel_gt_is_wedged(gt)) err = -EIO; return err; @@ -36,38 +39,38 @@ static int igt_global_reset(void *arg) static int igt_wedged_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; intel_wakeref_t wakeref; /* Check that we can recover a wedged device with a GPU reset */ - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(>->i915->runtime_pm); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); - GEM_BUG_ON(!i915_reset_failed(i915)); - i915_reset(i915, ALL_ENGINES, NULL); + GEM_BUG_ON(!intel_gt_is_wedged(gt)); + intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + igt_global_reset_unlock(gt); - return i915_reset_failed(i915) ? -EIO : 0; + return intel_gt_is_wedged(gt) ? -EIO : 0; } static int igt_atomic_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; int err = 0; /* Check that the resets are usable from atomic context */ - intel_gt_pm_get(&i915->gt); - igt_global_reset_lock(i915); + intel_gt_pm_get(gt); + igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto unlock; for (p = igt_atomic_phases; p->name; p++) { @@ -75,13 +78,13 @@ static int igt_atomic_reset(void *arg) GEM_TRACE("intel_gpu_reset under %s\n", p->name); - awake = reset_prepare(i915); + awake = reset_prepare(gt); p->critical_section_begin(); - err = intel_gpu_reset(i915, ALL_ENGINES); + err = intel_gpu_reset(gt, ALL_ENGINES); p->critical_section_end(); - reset_finish(i915, awake); + reset_finish(gt, awake); if (err) { pr_err("intel_gpu_reset failed under %s\n", p->name); @@ -90,18 +93,18 @@ static int igt_atomic_reset(void *arg) } /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); + igt_force_reset(gt); unlock: - igt_global_reset_unlock(i915); - intel_gt_pm_put(&i915->gt); + igt_global_reset_unlock(gt); + intel_gt_pm_put(gt); return err; } static int igt_atomic_engine_reset(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -109,33 +112,33 @@ static int igt_atomic_engine_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - intel_gt_pm_get(&i915->gt); - igt_global_reset_lock(i915); + intel_gt_pm_get(gt); + igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ - if (!igt_force_reset(i915)) + if (!igt_force_reset(gt)) goto out_unlock; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { tasklet_disable_nosync(&engine->execlists.tasklet); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { - GEM_TRACE("i915_reset_engine(%s) under %s\n", + GEM_TRACE("intel_engine_reset(%s) under %s\n", engine->name, p->name); p->critical_section_begin(); - err = i915_reset_engine(engine, NULL); + err = intel_engine_reset(engine, NULL); p->critical_section_end(); if (err) { - pr_err("i915_reset_engine(%s) failed under %s\n", + pr_err("intel_engine_reset(%s) failed under %s\n", engine->name, p->name); break; } @@ -148,11 +151,11 @@ static int igt_atomic_engine_reset(void *arg) } /* As we poke around the guts, do a full reset before continuing. */ - igt_force_reset(i915); + igt_force_reset(gt); out_unlock: - igt_global_reset_unlock(i915); - intel_gt_pm_put(&i915->gt); + igt_global_reset_unlock(gt); + intel_gt_pm_put(gt); return err; } @@ -165,17 +168,13 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_atomic_reset), SUBTEST(igt_atomic_engine_reset), }; - intel_wakeref_t wakeref; - int err = 0; + struct intel_gt *gt = &i915->gt; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt->i915)) return 0; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - with_intel_runtime_pm(&i915->runtime_pm, wakeref) - err = i915_subtests(tests, i915); - - return err; + return intel_gt_live_subtests(tests, gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 4d72cef506ae..321481403165 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "intel_gt.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -838,7 +839,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_wrap), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index fa01ea7855de..2978f435dcca 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -12,7 +12,6 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" -#include "selftests/igt_wedge_me.h" #include "selftests/mock_drm.h" #include "gem/selftests/igt_gem_utils.h" @@ -185,7 +184,7 @@ static int check_whitelist(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *results; - struct igt_wedge_me wedge; + struct intel_wedge_me wedge; u32 *vaddr; int err; int i; @@ -196,10 +195,10 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; i915_gem_object_lock(results); - igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ + intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); - if (i915_terminally_wedged(ctx->i915)) + if (intel_gt_is_wedged(&ctx->i915->gt)) err = -EIO; if (err) goto out_put; @@ -232,13 +231,13 @@ static int check_whitelist(struct i915_gem_context *ctx, static int do_device_reset(struct intel_engine_cs *engine) { - i915_reset(engine->i915, engine->mask, "live_workarounds"); + intel_gt_reset(engine->gt, engine->mask, "live_workarounds"); return 0; } static int do_engine_reset(struct intel_engine_cs *engine) { - return i915_reset_engine(engine, "live_workarounds"); + return intel_engine_reset(engine, "live_workarounds"); } static int @@ -574,7 +573,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); - i915_gem_set_wedged(ctx->i915); + intel_gt_set_wedged(&ctx->i915->gt); err = -EIO; goto out_batch; } @@ -711,7 +710,7 @@ static int live_reset_whitelist(void *arg) if (!engine || engine->whitelist.count == 0) return 0; - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); if (intel_has_reset_engine(i915)) { err = check_whitelist_across_reset(engine, @@ -730,7 +729,7 @@ static int live_reset_whitelist(void *arg) } out: - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); return err; } @@ -1098,7 +1097,7 @@ live_gpu_reset_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); wakeref = intel_runtime_pm_get(&i915->runtime_pm); reference_lists_init(i915, &lists); @@ -1107,7 +1106,7 @@ live_gpu_reset_workarounds(void *arg) if (!ok) goto out; - i915_reset(i915, ALL_ENGINES, "live_workarounds"); + intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after reset"); @@ -1115,7 +1114,7 @@ live_gpu_reset_workarounds(void *arg) kernel_context_close(ctx); reference_lists_fini(i915, &lists); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); return ok ? 0 : -ESRCH; } @@ -1140,7 +1139,7 @@ live_engine_reset_workarounds(void *arg) if (IS_ERR(ctx)) return PTR_ERR(ctx); - igt_global_reset_lock(i915); + igt_global_reset_lock(&i915->gt); wakeref = intel_runtime_pm_get(&i915->runtime_pm); reference_lists_init(i915, &lists); @@ -1156,7 +1155,7 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_reset_engine(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after idle reset"); if (!ok) { @@ -1184,7 +1183,7 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_reset_engine(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds"); igt_spinner_end(&spin); igt_spinner_fini(&spin); @@ -1199,7 +1198,7 @@ live_engine_reset_workarounds(void *arg) err: reference_lists_fini(i915, &lists); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(i915); + igt_global_reset_unlock(&i915->gt); kernel_context_close(ctx); igt_flush_test(i915, I915_WAIT_LOCKED); @@ -1218,7 +1217,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) }; int err; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; mutex_lock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bc5af2d2909a..3405d8cab2e4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1000,15 +1000,16 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv, static int i915_hangcheck_info(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; intel_wakeref_t wakeref; enum intel_engine_id id; - seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags); - if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) + seq_printf(m, "Reset flags: %lx\n", gt->reset.flags); + if (test_bit(I915_WEDGED, >->reset.flags)) seq_puts(m, "\tWedged\n"); - if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) + if (test_bit(I915_RESET_BACKOFF, >->reset.flags)) seq_puts(m, "\tDevice (global) reset in progress\n"); if (!i915_modparams.enable_hangcheck) { @@ -1016,19 +1017,19 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) return 0; } - if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) + if (timer_pending(>->hangcheck.work.timer)) seq_printf(m, "Hangcheck active, timer fires in %dms\n", - jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - + jiffies_to_msecs(gt->hangcheck.work.timer.expires - jiffies)); - else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) + else if (delayed_work_pending(>->hangcheck.work)) seq_puts(m, "Hangcheck active, work pending\n"); else seq_puts(m, "Hangcheck inactive\n"); - seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); + seq_printf(m, "GT active? %s\n", yesno(gt->awake)); - with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { - for_each_engine(engine, dev_priv, id) { + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + for_each_engine(engine, i915, id) { struct intel_instdone instdone; seq_printf(m, "%s: %d ms ago\n", @@ -1043,10 +1044,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_engine_get_instdone(engine, &instdone); seq_puts(m, "\tinstdone read =\n"); - i915_instdone_info(dev_priv, m, &instdone); + i915_instdone_info(i915, m, &instdone); seq_puts(m, "\tinstdone accu =\n"); - i915_instdone_info(dev_priv, m, + i915_instdone_info(i915, m, &engine->hangcheck.instdone); } } @@ -1054,23 +1055,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) return 0; } -static int i915_reset_info(struct seq_file *m, void *unused) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct i915_gpu_error *error = &dev_priv->gpu_error; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - seq_printf(m, "full gpu reset = %u\n", i915_reset_count(error)); - - for_each_engine(engine, dev_priv, id) { - seq_printf(m, "%s = %u\n", engine->name, - i915_reset_engine_count(error, engine)); - } - - return 0; -} - static int ironlake_drpc_info(struct seq_file *m) { struct drm_i915_private *i915 = node_to_i915(m->private); @@ -3547,7 +3531,8 @@ static const struct file_operations i915_cur_wm_latency_fops = { static int i915_wedged_get(void *data, u64 *val) { - int ret = i915_terminally_wedged(data); + struct drm_i915_private *i915 = data; + int ret = intel_gt_terminally_wedged(&i915->gt); switch (ret) { case -EIO: @@ -3567,11 +3552,11 @@ i915_wedged_set(void *data, u64 val) struct drm_i915_private *i915 = data; /* Flush any previous reset before applying for a new one */ - wait_event(i915->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)); + wait_event(i915->gt.reset.queue, + !test_bit(I915_RESET_BACKOFF, &i915->gt.reset.flags)); - i915_handle_error(i915, val, I915_ERROR_CAPTURE, - "Manually set wedged engine mask = %llx", val); + intel_gt_handle_error(&i915->gt, val, I915_ERROR_CAPTURE, + "Manually set wedged engine mask = %llx", val); return 0; } @@ -3614,8 +3599,9 @@ i915_drop_caches_set(void *data, u64 val) val, val & DROP_ALL); if (val & DROP_RESET_ACTIVE && - wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) - i915_gem_set_wedged(i915); + wait_for(intel_engines_are_idle(&i915->gt), + I915_IDLE_ENGINES_TIMEOUT)) + intel_gt_set_wedged(&i915->gt); /* No need to check and wait for gpu resets, only libdrm auto-restarts * on ioctls on -EAGAIN. */ @@ -3650,8 +3636,8 @@ i915_drop_caches_set(void *data, u64 val) mutex_unlock(&i915->drm.struct_mutex); } - if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(i915)) - i915_handle_error(i915, ALL_ENGINES, 0, NULL); + if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt)) + intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL); fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) @@ -4305,7 +4291,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, {"i915_hangcheck_info", i915_hangcheck_info, 0}, - {"i915_reset_info", i915_reset_info, 0}, {"i915_drpc_info", i915_drpc_info, 0}, {"i915_emon_status", i915_emon_status, 0}, {"i915_ring_freq_table", i915_ring_freq_table, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 794c6814a6d0..4b9860986a93 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -941,7 +941,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) if (ret < 0) goto err_uc; intel_irq_init(dev_priv); - intel_hangcheck_init(dev_priv); intel_init_display_hooks(dev_priv); intel_init_clock_gating_hooks(dev_priv); intel_init_audio_hooks(dev_priv); @@ -1960,7 +1959,7 @@ void i915_driver_unload(struct drm_device *dev) * all in-flight requests so that we can quickly unbind the active * resources. */ - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(&dev_priv->gt); /* Flush any external code that still may be under the RCU lock */ synchronize_rcu(); @@ -1981,7 +1980,7 @@ void i915_driver_unload(struct drm_device *dev) intel_csr_ucode_fini(dev_priv); /* Free error state after interrupts are fully disabled. */ - cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); + cancel_delayed_work_sync(&dev_priv->gt.hangcheck.work); i915_reset_error_state(dev_priv); i915_gem_fini_hw(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 64d8de4d7716..1352293a368f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2400,28 +2400,10 @@ extern int i915_driver_load(struct pci_dev *pdev, extern void i915_driver_unload(struct drm_device *dev); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); -extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv); -static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) -{ - unsigned long delay; - - if (unlikely(!i915_modparams.enable_hangcheck)) - return; - - /* Don't continually defer the hangcheck so that it is always run at - * least once after work has been scheduled on any ring. Otherwise, - * we will ignore a hung ring if a second ring is kept busy. - */ - - delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES); - queue_delayed_work(system_long_wq, - &dev_priv->gpu_error.hangcheck_work, delay); -} - static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { return dev_priv->gvt; @@ -2506,30 +2488,17 @@ int i915_gem_mmap_gtt_version(void); int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); -static inline bool __i915_wedged(struct i915_gpu_error *error) -{ - return unlikely(test_bit(I915_WEDGED, &error->flags)); -} - -static inline bool i915_reset_failed(struct drm_i915_private *i915) -{ - return __i915_wedged(&i915->gpu_error); -} - static inline u32 i915_reset_count(struct i915_gpu_error *error) { - return READ_ONCE(error->reset_count); + return atomic_read(&error->reset_count); } static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, struct intel_engine_cs *engine) { - return READ_ONCE(error->reset_engine_count[engine->id]); + return atomic_read(&error->reset_engine_count[engine->uabi_class]); } -void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); - void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c42f2146675c..9c359d437bf5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -896,13 +896,13 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -static int wait_for_engines(struct drm_i915_private *i915) +static int wait_for_engines(struct intel_gt *gt) { - if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { - dev_err(i915->drm.dev, + if (wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) { + dev_err(gt->i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(gt); return -EIO; } @@ -973,7 +973,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, lockdep_assert_held(&i915->drm.struct_mutex); - err = wait_for_engines(i915); + err = wait_for_engines(&i915->gt); if (err) return err; @@ -1151,8 +1151,8 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * back to defaults, recovering from whatever wedged state we left it * in and so worth trying to use the device once more. */ - if (i915_terminally_wedged(i915)) - i915_gem_unset_wedged(i915); + if (intel_gt_is_wedged(&i915->gt)) + intel_gt_unset_wedged(&i915->gt); /* * If we inherit context state from the BIOS or earlier occupants @@ -1204,7 +1204,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915) int ret; BUG_ON(!i915->kernel_context); - ret = i915_terminally_wedged(i915); + ret = intel_gt_terminally_wedged(gt); if (ret) return ret; @@ -1386,7 +1386,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) * and ready to be torn-down. The quickest way we can accomplish * this is by declaring ourselves wedged. */ - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); goto out_ctx; } @@ -1541,7 +1541,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_gt: mutex_unlock(&dev_priv->drm.struct_mutex); - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(&dev_priv->gt); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); @@ -1583,10 +1583,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - if (!i915_reset_failed(dev_priv)) { + if (!intel_gt_is_wedged(&dev_priv->gt)) { i915_load_error(dev_priv, "Failed to initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(dev_priv); + intel_gt_set_wedged(&dev_priv->gt); } /* Minimal basic recovery for KMS */ @@ -1668,11 +1668,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) i915_gem_init__mm(dev_priv); i915_gem_init__pm(dev_priv); - init_waitqueue_head(&dev_priv->gpu_error.wait_queue); - init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - mutex_init(&dev_priv->gpu_error.wedge_mutex); - init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); - atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); spin_lock_init(&dev_priv->fb_tracking.lock); @@ -1691,7 +1686,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.shrink_count); - cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); + intel_gt_cleanup_early(&dev_priv->gt); i915_gemfs_fini(dev_priv); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index ed719c8a5bd6..5cc9dfea1e9e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -7,6 +7,7 @@ #ifndef _I915_GPU_ERROR_H_ #define _I915_GPU_ERROR_H_ +#include <linux/atomic.h> #include <linux/kref.h> #include <linux/ktime.h> #include <linux/sched.h> @@ -163,12 +164,6 @@ struct i915_gpu_state { }; struct i915_gpu_error { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work hangcheck_work; - /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ @@ -176,52 +171,11 @@ struct i915_gpu_error { atomic_t pending_fb_pin; - /** - * flags: Control various stages of the GPU reset - * - * #I915_RESET_BACKOFF - When we start a global reset, we need to - * serialise with any other users attempting to do the same, and - * any global resources that may be clobber by the reset (such as - * FENCE registers). - * - * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit - * flag to prevent two concurrent reset attempts in the same engine. - * As the number of engines continues to grow, allocate the flags from - * the most significant bits. - * - * #I915_WEDGED - If reset fails and we can no longer use the GPU, - * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_request_alloc(), this bit is checked and the sequence - * aborted (with -EIO reported to userspace) if set. - */ - unsigned long flags; -#define I915_RESET_BACKOFF 0 -#define I915_RESET_MODESET 1 -#define I915_RESET_ENGINE 2 -#define I915_WEDGED (BITS_PER_LONG - 1) - /** Number of times the device has been reset (global) */ - u32 reset_count; + atomic_t reset_count; /** Number of times an engine has been reset */ - u32 reset_engine_count[I915_NUM_ENGINES]; - - struct mutex wedge_mutex; /* serialises wedging/unwedging */ - - /** - * Waitqueue to signal when a hang is detected. Used to for waiters - * to release the struct_mutex for the reset to procede. - */ - wait_queue_head_t wait_queue; - - /** - * Waitqueue to signal when the reset has completed. Used by clients - * that wait for dev_priv->mm.wedged to settle. - */ - wait_queue_head_t reset_queue; - - struct srcu_struct reset_backoff_srcu; + atomic_t reset_engine_count[I915_NUM_ENGINES]; }; struct drm_i915_error_state_buf { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 18568738fa30..92313a59563c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1369,8 +1369,7 @@ long i915_request_wait(struct i915_request *rq, * serialise wait/reset with an explicit lock, we do want * lockdep to detect potential dependency cycles. */ - mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map, - 0, 0, _THIS_IP_); + mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_); /* * Optimistic spin before touching IRQs. @@ -1448,7 +1447,7 @@ long i915_request_wait(struct i915_request *rq, dma_fence_remove_callback(&rq->fence, &wait.cb); out: - mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_); + mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_); trace_i915_request_wait_end(rq); return timeout; } diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index d9b17b9e6993..acdf6eb9e262 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -72,6 +72,9 @@ int __i915_nop_teardown(int err, void *data); int __i915_live_setup(void *data); int __i915_live_teardown(int err, void *data); +int __intel_gt_live_setup(void *data); +int __intel_gt_live_teardown(int err, void *data); + int __i915_subtests(const char *caller, int (*setup)(void *data), int (*teardown)(int err, void *data), @@ -88,6 +91,12 @@ int __i915_subtests(const char *caller, __i915_live_setup, __i915_live_teardown, \ T, ARRAY_SIZE(T), data); \ }) +#define intel_gt_live_subtests(T, data) ({ \ + typecheck(struct intel_gt *, data); \ + __i915_subtests(__func__, \ + __intel_gt_live_setup, __intel_gt_live_teardown, \ + T, ARRAY_SIZE(T), data); \ +}) #define SUBTEST(x) { x, #x } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index f104b94c14ef..434ea3ff7e4c 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -887,7 +887,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled) if (!i915_request_started(rq)) stalled = false; - i915_reset_request(rq, stalled); + __i915_request_reset(rq, stalled); intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); out_unlock: diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index fdf00f1ebb57..b0e83e91ea6f 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -38,7 +38,7 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) int ret; u32 guc_status; - ret = intel_reset_guc(dev_priv); + ret = intel_reset_guc(&dev_priv->gt); if (ret) { DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); return ret; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 29603c5173c4..77d844ac8b71 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -7,6 +7,7 @@ #include <linux/kref.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" @@ -220,7 +221,7 @@ int i915_active_live_selftests(struct drm_i915_private *i915) SUBTEST(live_active_retire), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index b8ffae481730..bb6dd54a6ff3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -8,6 +8,7 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" @@ -206,7 +207,7 @@ int i915_gem_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_hibernate), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_live_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index a3cb0aade6f1..b6449d0a8c17 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -25,6 +25,7 @@ #include "gem/i915_gem_pm.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_gt.h" #include "i915_selftest.h" @@ -557,7 +558,7 @@ int i915_gem_evict_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_evict_contexts), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 1bbfc43d4a9e..86c299663934 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -380,7 +380,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) t->engine->name); GEM_TRACE_DUMP(); - i915_gem_set_wedged(t->engine->i915); + intel_gt_set_wedged(t->engine->gt); GEM_BUG_ON(!i915_request_completed(rq)); i915_sw_fence_wait(wait); err = -EIO; @@ -1234,7 +1234,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_breadcrumbs_smoketest), }; - if (i915_terminally_wedged(i915)) + if (intel_gt_is_wedged(&i915->gt)) return 0; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index f46ccf817ad5..db9c645bbdfe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -256,7 +256,7 @@ int __i915_live_setup(void *data) { struct drm_i915_private *i915 = data; - return i915_terminally_wedged(i915); + return intel_gt_terminally_wedged(&i915->gt); } int __i915_live_teardown(int err, void *data) @@ -273,6 +273,27 @@ int __i915_live_teardown(int err, void *data) return err; } +int __intel_gt_live_setup(void *data) +{ + struct intel_gt *gt = data; + + return intel_gt_terminally_wedged(gt); +} + +int __intel_gt_live_teardown(int err, void *data) +{ + struct intel_gt *gt = data; + + mutex_lock(>->i915->drm.struct_mutex); + if (igt_flush_test(gt->i915, I915_WAIT_LOCKED)) + err = -EIO; + mutex_unlock(>->i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(gt->i915); + + return err; +} + int __i915_subtests(const char *caller, int (*setup)(void *data), int (*teardown)(int err, void *data), diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 5bfd1b2626a2..d3b5eb402d33 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -5,6 +5,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_selftest.h" @@ -13,7 +14,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) { - int ret = i915_terminally_wedged(i915) ? -EIO : 0; + int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0; int repeat = !!(flags & I915_WAIT_LOCKED); cond_resched(); @@ -27,7 +28,7 @@ int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) __builtin_return_address(0)); GEM_TRACE_DUMP(); - i915_gem_set_wedged(i915); + intel_gt_set_wedged(&i915->gt); repeat = 0; ret = -EIO; } diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 587df6fd4ffe..7ec8f8b049c6 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -7,47 +7,45 @@ #include "igt_reset.h" #include "gt/intel_engine.h" +#include "gt/intel_gt.h" #include "../i915_drv.h" -void igt_global_reset_lock(struct drm_i915_private *i915) +void igt_global_reset_lock(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - pr_debug("%s: current gpu_error=%08lx\n", - __func__, i915->gpu_error.flags); + pr_debug("%s: current gpu_error=%08lx\n", __func__, gt->reset.flags); - while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) - wait_event(i915->gpu_error.reset_queue, - !test_bit(I915_RESET_BACKOFF, - &i915->gpu_error.flags)); + while (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) + wait_event(gt->reset.queue, + !test_bit(I915_RESET_BACKOFF, >->reset.flags)); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { while (test_and_set_bit(I915_RESET_ENGINE + id, - &i915->gpu_error.flags)) - wait_on_bit(&i915->gpu_error.flags, - I915_RESET_ENGINE + id, + >->reset.flags)) + wait_on_bit(>->reset.flags, I915_RESET_ENGINE + id, TASK_UNINTERRUPTIBLE); } } -void igt_global_reset_unlock(struct drm_i915_private *i915) +void igt_global_reset_unlock(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) - clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + for_each_engine(engine, gt->i915, id) + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); - wake_up_all(&i915->gpu_error.reset_queue); + clear_bit(I915_RESET_BACKOFF, >->reset.flags); + wake_up_all(>->reset.queue); } -bool igt_force_reset(struct drm_i915_private *i915) +bool igt_force_reset(struct intel_gt *gt) { - i915_gem_set_wedged(i915); - i915_reset(i915, 0, NULL); + intel_gt_set_wedged(gt); + intel_gt_reset(gt, 0, NULL); - return !i915_reset_failed(i915); + return !intel_gt_is_wedged(gt); } diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.h b/drivers/gpu/drm/i915/selftests/igt_reset.h index 363bd853e50f..851873b67ab3 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.h +++ b/drivers/gpu/drm/i915/selftests/igt_reset.h @@ -7,10 +7,12 @@ #ifndef __I915_SELFTESTS_IGT_RESET_H__ #define __I915_SELFTESTS_IGT_RESET_H__ -#include "../i915_drv.h" +#include <linux/types.h> -void igt_global_reset_lock(struct drm_i915_private *i915); -void igt_global_reset_unlock(struct drm_i915_private *i915); -bool igt_force_reset(struct drm_i915_private *i915); +struct intel_gt; + +void igt_global_reset_lock(struct intel_gt *gt); +void igt_global_reset_unlock(struct intel_gt *gt); +bool igt_force_reset(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h deleted file mode 100644 index 08e5ff11bbd9..000000000000 --- a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#ifndef IGT_WEDGE_ME_H -#define IGT_WEDGE_ME_H - -#include <linux/workqueue.h> - -#include "../i915_gem.h" - -struct drm_i915_private; - -struct igt_wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const char *name; -}; - -static void __igt_wedge_me(struct work_struct *work) -{ - struct igt_wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%s timed out, cancelling test.\n", w->name); - - GEM_TRACE("%s timed out.\n", w->name); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __igt_init_wedge(struct igt_wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const char *name) -{ - w->i915 = i915; - w->name = name; - - INIT_DELAYED_WORK_ONSTACK(&w->work, __igt_wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __igt_fini_wedge(struct igt_wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define igt_wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__igt_init_wedge((W), (DEV), (TIMEOUT), __func__); \ - (W)->i915; \ - __igt_fini_wedge((W))) - -#endif /* IGT_WEDGE_ME_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 2741805b56c2..fd4cc4809eb8 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -183,11 +183,6 @@ struct drm_i915_private *mock_gem_device(void) intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ - init_waitqueue_head(&i915->gpu_error.wait_queue); - init_waitqueue_head(&i915->gpu_error.reset_queue); - init_srcu_struct(&i915->gpu_error.reset_backoff_srcu); - mutex_init(&i915->gpu_error.wedge_mutex); - i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) goto err_drv; -- 2.22.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx