>-----Original Message----- >From: Intel-gfx [mailto:intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx] On Behalf Of >Daniele Ceraolo Spurio >Sent: Thursday, May 4, 2017 11:52 AM >To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx >Subject: [RFC] drm/i915/guc: capture GuC logs if FW fails to load > >We're currently deleting the GuC logs if the FW fails to load, but those are still >useful to understand why the loading failed. Instead of deleting them, taking a >snapshot allows us to access them after driver load is completed. Hi Daniele, I like the idea. But just to confirm, we are still going to get the status of fetch and load-like PENDING or FAIL, but the reason of failure is going to be in the debugfs. Correct? Anusha >Cc: Oscar Mateo <oscar.mateo@xxxxxxxxx> >Cc: Michal Wajdeczko <michal.wajdeczko@xxxxxxxxx> >Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> >--- > drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++--------------- > drivers/gpu/drm/i915/i915_drv.c | 3 +++ > drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ > drivers/gpu/drm/i915/i915_gpu_error.c | 36 >+++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_guc_fwif.h | 14 +++++++++++--- >drivers/gpu/drm/i915/intel_guc_log.c | 10 ++-------- > drivers/gpu/drm/i915/intel_uc.c | 7 +++++-- > 7 files changed, 84 insertions(+), 28 deletions(-) > >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c >b/drivers/gpu/drm/i915/i915_debugfs.c >index 870c470..4ff20fc 100644 >--- a/drivers/gpu/drm/i915/i915_debugfs.c >+++ b/drivers/gpu/drm/i915/i915_debugfs.c >@@ -2543,26 +2543,32 @@ static int i915_guc_info(struct seq_file *m, void >*data) static int i915_guc_log_dump(struct seq_file *m, void *data) { > struct drm_i915_private *dev_priv = node_to_i915(m->private); >- struct drm_i915_gem_object *obj; >- int i = 0, pg; >- >- if (!dev_priv->guc.log.vma) >+ u32 *log; >+ int i = 0; >+ >+ if (dev_priv->guc.log.vma) { >+ log = i915_gem_object_pin_map(dev_priv->guc.log.vma->obj, >+ I915_MAP_WC); >+ if (IS_ERR(log)) { >+ DRM_ERROR("Failed to pin guc_log vma\n"); >+ return -ENOMEM; >+ } >+ } else if (dev_priv->gpu_error.guc_load_fail_log) { >+ log = dev_priv->gpu_error.guc_load_fail_log; >+ } else { > return 0; >- >- obj = dev_priv->guc.log.vma->obj; >- for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { >- u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); >- >- for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) >- seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", >- *(log + i), *(log + i + 1), >- *(log + i + 2), *(log + i + 3)); >- >- kunmap_atomic(log); > } > >+ for (i = 0; i < GUC_LOG_SIZE / sizeof(u32); i += 4) >+ seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", >+ *(log + i), *(log + i + 1), >+ *(log + i + 2), *(log + i + 3)); >+ > seq_putc(m, '\n'); > >+ if (dev_priv->guc.log.vma) >+ i915_gem_object_unpin_map(dev_priv->guc.log.vma->obj); >+ > return 0; > } > >diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >index 452c265..c7cb36c 100644 >--- a/drivers/gpu/drm/i915/i915_drv.c >+++ b/drivers/gpu/drm/i915/i915_drv.c >@@ -1354,6 +1354,9 @@ void i915_driver_unload(struct drm_device *dev) > cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); > i915_reset_error_state(dev_priv); > >+ /* release GuC error log (if any) */ >+ i915_guc_load_error_log_free(dev_priv); >+ > /* Flush any outstanding unpin_work. */ > drain_workqueue(dev_priv->wq); > >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >index 4588b3e..761c663 100644 >--- a/drivers/gpu/drm/i915/i915_drv.h >+++ b/drivers/gpu/drm/i915/i915_drv.h >@@ -1555,6 +1555,9 @@ struct i915_gpu_error { > /* Protected by the above dev->gpu_error.lock. */ > struct i915_gpu_state *first_error; > >+ /* Log snapshot if GuC errors during load */ >+ void *guc_load_fail_log; >+ > unsigned long missed_irq_rings; > > /** >@@ -3687,6 +3690,9 @@ static inline void i915_reset_error_state(struct >drm_i915_private *i915) > > #endif > >+void i915_guc_load_error_log_capture(struct drm_i915_private *i915); >+void i915_guc_load_error_log_free(struct drm_i915_private *i915); >+ > const char *i915_cache_level_str(struct drm_i915_private *i915, int type); > > /* i915_cmd_parser.c */ >diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c >b/drivers/gpu/drm/i915/i915_gpu_error.c >index ec526d9..44a873b 100644 >--- a/drivers/gpu/drm/i915/i915_gpu_error.c >+++ b/drivers/gpu/drm/i915/i915_gpu_error.c >@@ -1809,3 +1809,39 @@ void i915_reset_error_state(struct drm_i915_private >*i915) > > i915_gpu_state_put(error); > } >+ >+void i915_guc_load_error_log_capture(struct drm_i915_private *i915) { >+ void *log, *buf; >+ struct i915_vma *vma = i915->guc.log.vma; >+ >+ if (i915->gpu_error.guc_load_fail_log || !vma) >+ return; >+ >+ /* >+ * the vma should be already pinned and mapped for log runtime >+ * management but let's play safe >+ */ >+ log = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); >+ if (IS_ERR(log)) { >+ DRM_ERROR("Failed to pin guc_log vma\n"); >+ return; >+ } >+ >+ buf = kzalloc(GUC_LOG_SIZE, GFP_KERNEL); >+ if (buf) { >+ memcpy(buf, log, GUC_LOG_SIZE); >+ i915->gpu_error.guc_load_fail_log = buf; >+ } else { >+ DRM_ERROR("Failed to copy guc log\n"); >+ } >+ >+ i915_gem_object_unpin_map(vma->obj); >+ return; >+} >+ >+void i915_guc_load_error_log_free(struct drm_i915_private *i915) { >+ if (i915->gpu_error.guc_load_fail_log) >+ kfree(i915->gpu_error.guc_load_fail_log); >+} >diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h >b/drivers/gpu/drm/i915/intel_guc_fwif.h >index 6156845..be83be0 100644 >--- a/drivers/gpu/drm/i915/intel_guc_fwif.h >+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h >@@ -77,6 +77,17 @@ > #define GUC_STAGE_DESC_ATTR_PCH BIT(6) > #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) > >+/* >+ * GuC log buffer size. The first page is to save log buffer state. >+Allocate one >+ * extra page for others in case for overlap */ >+#define GUC_LOG_STATE_PAGE 1 >+#define GUC_LOG_CRASH_PAGES 2 /* 1 + 1 extra */ >+#define GUC_LOG_DPC_PAGES 8 /* 7 + 1 extra */ >+#define GUC_LOG_ISR_PAGES 8 /* 7 + 1 extra */ >+#define GUC_LOG_SIZE ((GUC_LOG_STATE_PAGE + GUC_LOG_DPC_PAGES + \ >+ GUC_LOG_ISR_PAGES + GUC_LOG_CRASH_PAGES) << >PAGE_SHIFT) >+ > /* The guc control data is 10 DWORDs */ > #define GUC_CTL_CTXINFO 0 > #define GUC_CTL_CTXNUM_IN16_SHIFT 0 >@@ -93,11 +104,8 @@ > #define GUC_LOG_VALID (1 << 0) > #define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) > #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) >-#define GUC_LOG_CRASH_PAGES 1 > #define GUC_LOG_CRASH_SHIFT 4 >-#define GUC_LOG_DPC_PAGES 7 > #define GUC_LOG_DPC_SHIFT 6 >-#define GUC_LOG_ISR_PAGES 7 > #define GUC_LOG_ISR_SHIFT 9 > #define GUC_LOG_BUF_ADDR_SHIFT 12 > >diff --git a/drivers/gpu/drm/i915/intel_guc_log.c >b/drivers/gpu/drm/i915/intel_guc_log.c >index 16d3b87..b357da3 100644 >--- a/drivers/gpu/drm/i915/intel_guc_log.c >+++ b/drivers/gpu/drm/i915/intel_guc_log.c >@@ -524,7 +524,7 @@ int intel_guc_log_create(struct intel_guc *guc) { > struct i915_vma *vma; > unsigned long offset; >- uint32_t size, flags; >+ uint32_t flags; > int ret; > > GEM_BUG_ON(guc->log.vma); >@@ -532,12 +532,6 @@ int intel_guc_log_create(struct intel_guc *guc) > if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) > i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; > >- /* The first page is to save log buffer state. Allocate one >- * extra page for others in case for overlap */ >- size = (1 + GUC_LOG_DPC_PAGES + 1 + >- GUC_LOG_ISR_PAGES + 1 + >- GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; >- > /* We require SSE 4.1 for fast reads from the GuC log buffer and > * it should be present on the chipsets supporting GuC based > * submisssions. >@@ -547,7 +541,7 @@ int intel_guc_log_create(struct intel_guc *guc) > goto err; > } > >- vma = intel_guc_allocate_vma(guc, size); >+ vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE); > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > goto err; >diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c >index 7fd75ca..64f2314 100644 >--- a/drivers/gpu/drm/i915/intel_uc.c >+++ b/drivers/gpu/drm/i915/intel_uc.c >@@ -274,6 +274,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) > > guc_disable_communication(guc); > gen9_reset_guc_interrupts(dev_priv); >+ i915_guc_load_error_log_free(dev_priv); > > /* We need to notify the guc whenever we change the GGTT */ > i915_ggtt_enable_guc(dev_priv); >@@ -320,11 +321,11 @@ int intel_uc_init_hw(struct drm_i915_private >*dev_priv) > > /* Did we succeded or run out of retries? */ > if (ret) >- goto err_submission; >+ goto log_capture; > > ret = guc_enable_communication(guc); > if (ret) >- goto err_submission; >+ goto log_capture; > > intel_guc_auth_huc(dev_priv); > if (i915.enable_guc_submission) { >@@ -350,6 +351,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) > err_interrupts: > guc_disable_communication(guc); > gen9_disable_guc_interrupts(dev_priv); >+log_capture: >+ i915_guc_load_error_log_capture(dev_priv); > err_submission: > if (i915.enable_guc_submission) > i915_guc_submission_fini(dev_priv); >-- >1.9.1 > >_______________________________________________ >Intel-gfx mailing list >Intel-gfx@xxxxxxxxxxxxxxxxxxxxx >https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx