We're currently deleting the GuC logs if the FW fails to load, but those are still useful to understand why the loading failed. Instead of deleting them, taking a snapshot allows us to access them after driver load is completed. Cc: Oscar Mateo <oscar.mateo@xxxxxxxxx> Cc: Michal Wajdeczko <michal.wajdeczko@xxxxxxxxx> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++--------------- drivers/gpu/drm/i915/i915_drv.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_gpu_error.c | 36 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc_fwif.h | 14 +++++++++++--- drivers/gpu/drm/i915/intel_guc_log.c | 10 ++-------- drivers/gpu/drm/i915/intel_uc.c | 7 +++++-- 7 files changed, 84 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 870c470..4ff20fc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2543,26 +2543,32 @@ static int i915_guc_info(struct seq_file *m, void *data) static int i915_guc_log_dump(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_i915_gem_object *obj; - int i = 0, pg; - - if (!dev_priv->guc.log.vma) + u32 *log; + int i = 0; + + if (dev_priv->guc.log.vma) { + log = i915_gem_object_pin_map(dev_priv->guc.log.vma->obj, + I915_MAP_WC); + if (IS_ERR(log)) { + DRM_ERROR("Failed to pin guc_log vma\n"); + return -ENOMEM; + } + } else if (dev_priv->gpu_error.guc_load_fail_log) { + log = dev_priv->gpu_error.guc_load_fail_log; + } else { return 0; - - obj = dev_priv->guc.log.vma->obj; - for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { - u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); - - for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) - seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", - *(log + i), *(log + i + 1), - *(log + i + 2), *(log + i + 3)); - - kunmap_atomic(log); } + for (i = 0; i < GUC_LOG_SIZE / sizeof(u32); i += 4) + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(log + i), *(log + i + 1), + *(log + i + 2), *(log + i + 3)); + seq_putc(m, '\n'); + if (dev_priv->guc.log.vma) + i915_gem_object_unpin_map(dev_priv->guc.log.vma->obj); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 452c265..c7cb36c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1354,6 +1354,9 @@ void i915_driver_unload(struct drm_device *dev) cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); i915_reset_error_state(dev_priv); + /* release GuC error log (if any) */ + i915_guc_load_error_log_free(dev_priv); + /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4588b3e..761c663 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1555,6 +1555,9 @@ struct i915_gpu_error { /* Protected by the above dev->gpu_error.lock. */ struct i915_gpu_state *first_error; + /* Log snapshot if GuC errors during load */ + void *guc_load_fail_log; + unsigned long missed_irq_rings; /** @@ -3687,6 +3690,9 @@ static inline void i915_reset_error_state(struct drm_i915_private *i915) #endif +void i915_guc_load_error_log_capture(struct drm_i915_private *i915); +void i915_guc_load_error_log_free(struct drm_i915_private *i915); + const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ec526d9..44a873b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1809,3 +1809,39 @@ void i915_reset_error_state(struct drm_i915_private *i915) i915_gpu_state_put(error); } + +void i915_guc_load_error_log_capture(struct drm_i915_private *i915) +{ + void *log, *buf; + struct i915_vma *vma = i915->guc.log.vma; + + if (i915->gpu_error.guc_load_fail_log || !vma) + return; + + /* + * the vma should be already pinned and mapped for log runtime + * management but let's play safe + */ + log = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(log)) { + DRM_ERROR("Failed to pin guc_log vma\n"); + return; + } + + buf = kzalloc(GUC_LOG_SIZE, GFP_KERNEL); + if (buf) { + memcpy(buf, log, GUC_LOG_SIZE); + i915->gpu_error.guc_load_fail_log = buf; + } else { + DRM_ERROR("Failed to copy guc log\n"); + } + + i915_gem_object_unpin_map(vma->obj); + return; +} + +void i915_guc_load_error_log_free(struct drm_i915_private *i915) +{ + if (i915->gpu_error.guc_load_fail_log) + kfree(i915->gpu_error.guc_load_fail_log); +} diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 6156845..be83be0 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -77,6 +77,17 @@ #define GUC_STAGE_DESC_ATTR_PCH BIT(6) #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) +/* + * GuC log buffer size. The first page is to save log buffer state. Allocate one + * extra page for others in case for overlap + */ +#define GUC_LOG_STATE_PAGE 1 +#define GUC_LOG_CRASH_PAGES 2 /* 1 + 1 extra */ +#define GUC_LOG_DPC_PAGES 8 /* 7 + 1 extra */ +#define GUC_LOG_ISR_PAGES 8 /* 7 + 1 extra */ +#define GUC_LOG_SIZE ((GUC_LOG_STATE_PAGE + GUC_LOG_DPC_PAGES + \ + GUC_LOG_ISR_PAGES + GUC_LOG_CRASH_PAGES) << PAGE_SHIFT) + /* The guc control data is 10 DWORDs */ #define GUC_CTL_CTXINFO 0 #define GUC_CTL_CTXNUM_IN16_SHIFT 0 @@ -93,11 +104,8 @@ #define GUC_LOG_VALID (1 << 0) #define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) -#define GUC_LOG_CRASH_PAGES 1 #define GUC_LOG_CRASH_SHIFT 4 -#define GUC_LOG_DPC_PAGES 7 #define GUC_LOG_DPC_SHIFT 6 -#define GUC_LOG_ISR_PAGES 7 #define GUC_LOG_ISR_SHIFT 9 #define GUC_LOG_BUF_ADDR_SHIFT 12 diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 16d3b87..b357da3 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -524,7 +524,7 @@ int intel_guc_log_create(struct intel_guc *guc) { struct i915_vma *vma; unsigned long offset; - uint32_t size, flags; + uint32_t flags; int ret; GEM_BUG_ON(guc->log.vma); @@ -532,12 +532,6 @@ int intel_guc_log_create(struct intel_guc *guc) if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; - /* The first page is to save log buffer state. Allocate one - * extra page for others in case for overlap */ - size = (1 + GUC_LOG_DPC_PAGES + 1 + - GUC_LOG_ISR_PAGES + 1 + - GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; - /* We require SSE 4.1 for fast reads from the GuC log buffer and * it should be present on the chipsets supporting GuC based * submisssions. @@ -547,7 +541,7 @@ int intel_guc_log_create(struct intel_guc *guc) goto err; } - vma = intel_guc_allocate_vma(guc, size); + vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 7fd75ca..64f2314 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -274,6 +274,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); + i915_guc_load_error_log_free(dev_priv); /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); @@ -320,11 +321,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* Did we succeded or run out of retries? */ if (ret) - goto err_submission; + goto log_capture; ret = guc_enable_communication(guc); if (ret) - goto err_submission; + goto log_capture; intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -350,6 +351,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) err_interrupts: guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); +log_capture: + i915_guc_load_error_log_capture(dev_priv); err_submission: if (i915.enable_guc_submission) i915_guc_submission_fini(dev_priv); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx