Inject probe errors into intel_uc_init_hw to make sure we correctly handle any uC initialization failure. To avoid complains from CI about injected errors use i915_probe_error to lower message level. v2: _sanitize instead _reset to correctly handle Gen9 retries v3: reorder fw status codes as failed fw is still available add more failure points Signed-off-by: Michal Wajdeczko <michal.wajdeczko@xxxxxxxxx> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> #v1 --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++++ drivers/gpu/drm/i915/gt/uc/intel_huc.c | 8 +++++--- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 +++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 18 +++++++++++------ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 14 ++++++------- drivers/gpu/drm/i915/i915_gem.c | 2 +- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b4b508f19a1c..412892096daa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1123,6 +1123,10 @@ int intel_guc_submission_enable(struct intel_guc *guc) enum intel_engine_id id; int err; + err = i915_inject_load_error(gt->i915, -ENXIO); + if (err) + return err; + /* * We're using GuC work items for submitting work through GuC. Since * we're coalescing multiple requests from a single context into a diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d642b167a389..ef54053c5ef9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -139,6 +139,10 @@ int intel_huc_auth(struct intel_huc *huc) GEM_BUG_ON(!intel_uc_fw_is_loaded(&huc->fw)); GEM_BUG_ON(intel_huc_is_authenticated(huc)); + ret = i915_inject_load_error(gt->i915, -ENXIO); + if (ret) + goto fail; + ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->rsa_data)); if (ret) { @@ -158,13 +162,11 @@ int intel_huc_auth(struct intel_huc *huc) } huc->fw.status = INTEL_UC_FIRMWARE_RUNNING; - return 0; fail: + i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret); huc->fw.status = INTEL_UC_FIRMWARE_FAIL; - - DRM_ERROR("HuC: Authentication failed %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index d1b08b28b1ad..1d21c2646831 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -41,6 +41,10 @@ static int __intel_uc_reset_hw(struct intel_uc *uc) int ret; u32 guc_status; + ret = i915_inject_load_error(gt->i915, -ENXIO); + if (ret) + return ret; + ret = intel_reset_guc(gt); if (ret) { DRM_ERROR("Failed to reset GuC, ret = %d\n", ret); @@ -209,6 +213,10 @@ static int guc_enable_communication(struct intel_guc *guc) GEM_BUG_ON(guc_communication_enabled(guc)); + ret = i915_inject_load_error(i915, -ENXIO); + if (ret) + return ret; + ret = intel_guc_ct_enable(&guc->ct); if (ret) return ret; @@ -340,7 +348,7 @@ void intel_uc_fini(struct intel_uc *uc) intel_guc_fini(guc); } -static void __uc_sanitize(struct intel_uc *uc) +static int __uc_sanitize(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; struct intel_huc *huc = &uc->huc; @@ -350,7 +358,7 @@ static void __uc_sanitize(struct intel_uc *uc) intel_huc_sanitize(huc); intel_guc_sanitize(guc); - __intel_uc_reset_hw(uc); + return __intel_uc_reset_hw(uc); } void intel_uc_sanitize(struct intel_uc *uc) @@ -378,6 +386,10 @@ static int uc_init_wopcm(struct intel_uc *uc) GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK)); GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK); + err = i915_inject_load_error(gt->i915, -ENXIO); + if (err) + return err; + mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED; err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask, size | GUC_WOPCM_SIZE_LOCKED); @@ -434,7 +446,7 @@ int intel_uc_init_hw(struct intel_uc *uc) * Always reset the GuC just before (re)loading, so * that the state and timing are fairly predictable */ - ret = __intel_uc_reset_hw(uc); + ret = __uc_sanitize(uc); if (ret) goto err_out; @@ -504,7 +516,7 @@ int intel_uc_init_hw(struct intel_uc *uc) if (GEM_WARN_ON(ret == -EIO)) ret = -EINVAL; - dev_err(i915->drm.dev, "GuC initialization failed %d\n", ret); + i915_probe_error(i915, "GuC initialization failed %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 650ad6037b74..a3a22a26016c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -383,6 +383,10 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt, u64 offset; int ret; + ret = i915_inject_load_error(gt->i915, -ETIMEDOUT); + if (ret) + return ret; + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); /* Set the source address for the uCode */ @@ -443,8 +447,13 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, /* make sure the status was cleared the last time we reset the uc */ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw)); + err = i915_inject_load_error(gt->i915, -ENOEXEC); + if (err) + return err; + if (!intel_uc_fw_is_available(uc_fw)) return -ENOEXEC; + /* Call custom loader */ intel_uc_fw_ggtt_bind(uc_fw, gt); err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags); @@ -464,13 +473,10 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt, return 0; fail: + i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, + err); uc_fw->status = INTEL_UC_FIRMWARE_FAIL; - DRM_DEBUG_DRIVER("%s fw load failed\n", - intel_uc_fw_type_repr(uc_fw->type)); - - DRM_WARN("%s: Failed to load firmware %s (error %d)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); - return err; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 6b64b8073703..bfe3614613b7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -37,12 +37,12 @@ struct intel_gt; #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" enum intel_uc_fw_status { - INTEL_UC_FIRMWARE_FAIL = -3, /* failed to xfer or init/auth the fw */ - INTEL_UC_FIRMWARE_MISSING = -2, /* blob not found on the system */ - INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */ + INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW or disabled */ INTEL_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */ INTEL_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */ + INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */ + INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */ }; @@ -83,18 +83,18 @@ static inline const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status) { switch (status) { - case INTEL_UC_FIRMWARE_FAIL: - return "FAIL"; - case INTEL_UC_FIRMWARE_MISSING: - return "MISSING"; case INTEL_UC_FIRMWARE_NOT_SUPPORTED: return "N/A"; case INTEL_UC_FIRMWARE_UNINITIALIZED: return "UNINITIALIZED"; case INTEL_UC_FIRMWARE_SELECTED: return "SELECTED"; + case INTEL_UC_FIRMWARE_MISSING: + return "MISSING"; case INTEL_UC_FIRMWARE_AVAILABLE: return "AVAILABLE"; + case INTEL_UC_FIRMWARE_FAIL: + return "FAIL"; case INTEL_UC_FIRMWARE_TRANSFERRED: return "TRANSFERRED"; case INTEL_UC_FIRMWARE_RUNNING: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5e87acc4b770..2436cd598e6e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1243,7 +1243,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915) /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(>->uc); if (ret) { - DRM_ERROR("Enabling uc failed (%d)\n", ret); + i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); goto out; } -- 2.19.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx