Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
---
drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 10 ++--
drivers/gpu/drm/i915/gt/uc/intel_huc.c | 64 ++++++++++++++++++++---
2 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 0f79cb6585182..52332bb143395 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -184,7 +184,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
* in the seconds range. However, there is a limit on how long an
* individual wait_for() can wait. So wrap it in a loop.
*/
- before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps);
+ before_freq = intel_rps_read_actual_frequency(>->rps);
before = ktime_get();
for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
ret = wait_for(guc_load_done(uncore, &status, &success),
1000);
@@ -192,7 +192,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
break;
guc_dbg(guc, "load still in progress, count = %d, freq =
%dMHz, status = 0x%08X [0x%02X/%02X]\n",
- count,
intel_rps_read_actual_frequency(&uncore->gt->rps), status,
+ count, intel_rps_read_actual_frequency(>->rps), status,
REG_FIELD_GET(GS_BOOTROM_MASK, status),
REG_FIELD_GET(GS_UKERNEL_MASK, status));
}
@@ -204,7 +204,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
guc_info(guc, "load failed: status = 0x%08X, time =
%lldms, freq = %dMHz, ret = %d\n",
- status, delta_ms,
intel_rps_read_actual_frequency(&uncore->gt->rps), ret);
+ status, delta_ms,
intel_rps_read_actual_frequency(>->rps), ret);
guc_info(guc, "load failed: status: Reset = %d, BootROM =
0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status),
bootrom, ukernel,
@@ -254,11 +254,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
guc_warn(guc, "excessive init time: %lldms! [status =
0x%08X, count = %d, ret = %d]\n",
delta_ms, status, count, ret);
guc_warn(guc, "excessive init time: [freq = %dMHz, before =
%dMHz, perf_limit_reasons = 0x%08X]\n",
- intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq,
+ intel_rps_read_actual_frequency(>->rps), before_freq,
intel_uncore_read(uncore,
intel_gt_perf_limit_reasons_reg(gt)));
} else {
guc_dbg(guc, "init took %lldms, freq = %dMHz, before =
%dMHz, status = 0x%08X, count = %d, ret = %d\n",
- delta_ms,
intel_rps_read_actual_frequency(&uncore->gt->rps),
+ delta_ms, intel_rps_read_actual_frequency(>->rps),
before_freq, status, count, ret);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ba9e07fc2b577..9ccec7de9628a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include "gt/intel_gt.h"
+#include "gt/intel_rps.h"
#include "intel_guc_reg.h"
#include "intel_huc.h"
#include "intel_huc_print.h"
@@ -447,17 +448,68 @@ static const char *auth_mode_string(struct
intel_huc *huc,
return partial ? "clear media" : "all workloads";
}
+/*
+ * Use a longer timeout for debug builds so that problems can be
detected
+ * and analysed. But a shorter timeout for releases so that user's
don't
+ * wait forever to find out there is a problem. Note that the only
reason
+ * an end user should hit the timeout is in case of extreme thermal
throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define HUC_LOAD_RETRY_LIMIT 20
+#else
+#define HUC_LOAD_RETRY_LIMIT 3
+#endif
+
int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
- int ret;
+ struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
- ret = __intel_wait_for_register(gt->uncore,
- huc->status[type].reg,
- huc->status[type].mask,
- huc->status[type].value,
- 2, 50, NULL);
+ /*
+ * The KMD requests maximum frequency during driver load,
however thermal
+ * throttling can force the frequency down to minimum (although
the board
+ * really should never get that hot in real life!). IFWI issues
have been
+ * seen to cause sporadic failures to grant the higher
frequency. And at
+ * minimum frequency, the load time can be in the seconds range.
Note that