Since the reset path wants to recover the engines itself, it only wants to reinitialise the hardware using i915_gem_init_hw(). Pull the call to intel_engines_resume() to the module init/resume path so we can avoid it during reset. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 7 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 24 --- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 2 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 21 ++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 21 ++- drivers/gpu/drm/i915/i915_gem.c | 173 +++++++++------------- 7 files changed, 116 insertions(+), 134 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 6b730bd4d72f..4d774376f5b8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -254,14 +254,15 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); + if (i915_gem_init_hw(i915)) + goto err_wedged; + /* * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(&i915->gt); - - if (i915_gem_init_hw(i915)) + if (intel_gt_resume(&i915->gt)) goto err_wedged; intel_uc_resume(i915); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 5253c382034d..84e432abe8e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -142,27 +142,3 @@ void intel_engine_init__pm(struct intel_engine_cs *engine) { intel_wakeref_init(&engine->wakeref); } - -int intel_engines_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - intel_gt_pm_get(&i915->gt); - for_each_engine(engine, i915, id) { - intel_engine_pm_get(engine); - engine->serial++; /* kernel context lost */ - err = engine->resume(engine); - intel_engine_pm_put(engine); - if (err) { - dev_err(i915->drm.dev, - "Failed to restart %s (%d)\n", - engine->name, err); - break; - } - } - intel_gt_pm_put(&i915->gt); - - return err; -} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 7d057cdcd919..015ac72d7ad0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -31,6 +31,4 @@ void intel_engine_park(struct intel_engine_cs *engine); void intel_engine_init__pm(struct intel_engine_cs *engine); -int intel_engines_resume(struct drm_i915_private *i915); - #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index ec6b69d014b6..36ba80e6a0b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_pm.h" #include "intel_wakeref.h" @@ -122,10 +123,11 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) intel_engine_reset(engine, false); } -void intel_gt_resume(struct intel_gt *gt) +int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + int err = 0; /* * After resume, we may need to poke into the pinned kernel @@ -133,9 +135,12 @@ void intel_gt_resume(struct intel_gt *gt) * Only the kernel contexts should remain pinned over suspend, * allowing us to fixup the user contexts on their first pin. */ + intel_gt_pm_get(gt); for_each_engine(engine, gt->i915, id) { struct intel_context *ce; + intel_engine_pm_get(engine); + ce = engine->kernel_context; if (ce) ce->ops->reset(ce); @@ -143,5 +148,19 @@ void intel_gt_resume(struct intel_gt *gt) ce = engine->preempt_context; if (ce) ce->ops->reset(ce); + + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + + intel_engine_pm_put(engine); + if (err) { + dev_err(gt->i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } } + intel_gt_pm_put(gt); + + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 4dbb92cf58d7..ba960e1fc209 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -22,6 +22,6 @@ void intel_gt_pm_put(struct intel_gt *gt); void intel_gt_pm_init_early(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); -void intel_gt_resume(struct intel_gt *gt); +int intel_gt_resume(struct intel_gt *gt); #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index e7cbd9cf85c1..adfdb908587f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -949,6 +949,21 @@ static int do_reset(struct drm_i915_private *i915, return gt_reset(i915, stalled_mask); } +static int resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + for_each_engine(engine, i915, id) { + ret = engine->resume(engine); + if (ret) + return ret; + } + + return 0; +} + /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset @@ -1023,9 +1038,13 @@ void i915_reset(struct drm_i915_private *i915, if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); - goto error; + goto taint; } + ret = resume(i915); + if (ret) + goto taint; + i915_queue_hangcheck(i915); finish: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index deecbe128e5b..5cc3a75d521a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,7 +46,6 @@ #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" #include "gem/i915_gemfs.h" -#include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" @@ -1192,12 +1191,17 @@ static void init_unused_rings(struct intel_gt *gt) } } -static int init_hw(struct intel_gt *gt) +int i915_gem_init_hw(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; + struct intel_uncore *uncore = &i915->uncore; + struct intel_gt *gt = &i915->gt; int ret; + BUG_ON(!i915->kernel_context); + ret = i915_terminally_wedged(i915); + if (ret) + return ret; + gt->last_init_time = ktime_get(); /* Double layer security blanket, see i915_gem_init() */ @@ -1248,51 +1252,10 @@ static int init_hw(struct intel_gt *gt) intel_mocs_init_l3cc_table(gt); - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - return 0; - -out: - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - return ret; -} - -int i915_gem_init_hw(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - int ret; - - BUG_ON(!i915->kernel_context); - ret = i915_terminally_wedged(i915); - if (ret) - return ret; - - /* Double layer security blanket, see i915_gem_init() */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - ret = init_hw(&i915->gt); - if (ret) - goto err_init; - - /* Only when the HW is re-initialised, can we replay the requests */ - ret = intel_engines_resume(i915); - if (ret) - goto err_engines; - - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - intel_engines_set_scheduler_caps(i915); - return 0; - -err_engines: - intel_uc_fini_hw(i915); -err_init: +out: intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - - intel_engines_set_scheduler_caps(i915); - return ret; } @@ -1449,28 +1412,28 @@ static int intel_engines_verify_workarounds(struct drm_i915_private *i915) return err; } -int i915_gem_init(struct drm_i915_private *dev_priv) +int i915_gem_init(struct drm_i915_private *i915) { int ret; /* We need to fallback to 4K pages if host doesn't support huge gtt. */ - if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) - mkwrite_device_info(dev_priv)->page_sizes = + if (intel_vgpu_active(i915) && !intel_vgpu_has_huge_gtt(i915)) + mkwrite_device_info(i915)->page_sizes = I915_GTT_PAGE_SIZE_4K; - dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); + i915->mm.unordered_timeline = dma_fence_context_alloc(1); - intel_timelines_init(dev_priv); + intel_timelines_init(i915); - ret = i915_gem_init_userptr(dev_priv); + ret = i915_gem_init_userptr(i915); if (ret) return ret; - ret = intel_uc_init_misc(dev_priv); + ret = intel_uc_init_misc(i915); if (ret) return ret; - ret = intel_wopcm_init(&dev_priv->wopcm); + ret = intel_wopcm_init(&i915->wopcm); if (ret) goto err_uc_misc; @@ -1480,50 +1443,55 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * we hold the forcewake during initialisation these problems * just magically go away. */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - ret = i915_init_ggtt(dev_priv); + ret = i915_init_ggtt(i915); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_unlock; } - ret = i915_gem_init_scratch(dev_priv, - IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); + ret = i915_gem_init_scratch(i915, + IS_GEN(i915, 2) ? SZ_256K : PAGE_SIZE); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_ggtt; } - ret = intel_engines_setup(dev_priv); + ret = intel_engines_setup(i915); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_unlock; } - ret = i915_gem_contexts_init(dev_priv); + ret = i915_gem_contexts_init(i915); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_scratch; } - ret = intel_engines_init(dev_priv); + ret = intel_engines_init(i915); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_context; } - intel_init_gt_powersave(dev_priv); + intel_init_gt_powersave(i915); - ret = intel_uc_init(dev_priv); + ret = intel_uc_init(i915); if (ret) goto err_pm; - ret = i915_gem_init_hw(dev_priv); + ret = i915_gem_init_hw(i915); if (ret) goto err_uc_init; + /* Only when the HW is re-initialised, can we replay the requests */ + ret = intel_gt_resume(&i915->gt); + if (ret) + goto err_init_hw; + /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -1533,28 +1501,28 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * * FIXME: break up the workarounds and apply them at the right time! */ - intel_init_clock_gating(dev_priv); + intel_init_clock_gating(i915); - ret = intel_engines_verify_workarounds(dev_priv); + ret = intel_engines_verify_workarounds(i915); if (ret) - goto err_init_hw; + goto err_gt; - ret = __intel_engines_record_defaults(dev_priv); + ret = __intel_engines_record_defaults(i915); if (ret) - goto err_init_hw; + goto err_gt; if (i915_inject_load_failure()) { ret = -ENODEV; - goto err_init_hw; + goto err_gt; } if (i915_inject_load_failure()) { ret = -EIO; - goto err_init_hw; + goto err_gt; } - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); return 0; @@ -1564,66 +1532,67 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * HW as irrevisibly wedged, but keep enough state around that the * driver doesn't explode during runtime. */ -err_init_hw: - mutex_unlock(&dev_priv->drm.struct_mutex); +err_gt: + mutex_unlock(&i915->drm.struct_mutex); - i915_gem_set_wedged(dev_priv); - i915_gem_suspend(dev_priv); - i915_gem_suspend_late(dev_priv); + i915_gem_set_wedged(i915); + i915_gem_suspend(i915); + i915_gem_suspend_late(i915); - i915_gem_drain_workqueue(dev_priv); + i915_gem_drain_workqueue(i915); - mutex_lock(&dev_priv->drm.struct_mutex); - intel_uc_fini_hw(dev_priv); + mutex_lock(&i915->drm.struct_mutex); +err_init_hw: + intel_uc_fini_hw(i915); err_uc_init: - intel_uc_fini(dev_priv); + intel_uc_fini(i915); err_pm: if (ret != -EIO) { - intel_cleanup_gt_powersave(dev_priv); - intel_engines_cleanup(dev_priv); + intel_cleanup_gt_powersave(i915); + intel_engines_cleanup(i915); } err_context: if (ret != -EIO) - i915_gem_contexts_fini(dev_priv); + i915_gem_contexts_fini(i915); err_scratch: - i915_gem_fini_scratch(dev_priv); + i915_gem_fini_scratch(i915); err_ggtt: err_unlock: - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); err_uc_misc: - intel_uc_fini_misc(dev_priv); + intel_uc_fini_misc(i915); if (ret != -EIO) { - i915_gem_cleanup_userptr(dev_priv); - intel_timelines_fini(dev_priv); + i915_gem_cleanup_userptr(i915); + intel_timelines_fini(i915); } if (ret == -EIO) { - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&i915->drm.struct_mutex); /* * Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ - if (!i915_reset_failed(dev_priv)) { - i915_load_error(dev_priv, + if (!i915_reset_failed(i915)) { + i915_load_error(i915, "Failed to initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(dev_priv); + i915_gem_set_wedged(i915); } /* Minimal basic recovery for KMS */ - ret = i915_ggtt_enable_hw(dev_priv); - i915_gem_restore_gtt_mappings(dev_priv); - i915_gem_restore_fences(dev_priv); - intel_init_clock_gating(dev_priv); + ret = i915_ggtt_enable_hw(i915); + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + intel_init_clock_gating(i915); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&i915->drm.struct_mutex); } - i915_gem_drain_freed_objects(dev_priv); + i915_gem_drain_freed_objects(i915); return ret; } -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx