The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 4 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 66 ++++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8f0ef8645ea9..673aaee527eb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -331,6 +331,7 @@ struct drm_i915_error_state { u32 faddr[I915_NUM_RINGS]; u64 fence[I915_MAX_NUM_FENCES]; struct timeval time; + struct drm_i915_error_ring { struct drm_i915_error_object { int page_count; @@ -344,6 +345,7 @@ struct drm_i915_error_state { } *requests; int num_requests; } ring[I915_NUM_RINGS]; + struct drm_i915_error_buffer { u32 size; u32 name; @@ -361,6 +363,8 @@ struct drm_i915_error_state { u32 cache_level:3; } **active_bo, **pinned_bo; u32 *active_bo_count, *pinned_bo_count; + u32 vm_count; + struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; int hangcheck_score[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cce9f559e3d7..b7ead8ce6628 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -188,10 +188,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { - err_printf(m, "%s [%d]:\n", name, count); + err_printf(m, " %s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x %8u %02x %02x %x %x", + err_printf(m, " %08x %8u %02x %02x %x %x", err->gtt_offset, err->size, err->read_domains, @@ -332,15 +332,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for_each_ring(ring, dev_priv, i) i915_ring_error_state(m, dev, error, i); - if (error->active_bo) + for (i = 0; i < error->vm_count; i++) { + err_printf(m, "vm[%d]\n", i); + print_error_buffers(m, "Active", - error->active_bo[0], - error->active_bo_count[0]); + error->active_bo[i], + error->active_bo_count[i]); - if (error->pinned_bo) print_error_buffers(m, "Pinned", - error->pinned_bo[0], - error->pinned_bo_count[0]); + error->pinned_bo[i], + error->pinned_bo_count[i]); + } for (i = 0; i < ARRAY_SIZE(error->ring); i++) { struct drm_i915_error_object *obj; @@ -606,13 +608,23 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, } static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) + int count, struct list_head *head, + struct i915_address_space *vm) { struct drm_i915_gem_object *obj; int i = 0; list_for_each_entry(obj, head, global_list) { - if (!i915_gem_obj_is_pinned(obj)) + struct i915_vma *vma; + bool bound = false; + + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + bound = true; + break; + } + + if (!bound) continue; capture_bo(err++, obj); @@ -876,9 +888,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, list_for_each_entry(vma, &vm->active_list, mm_list) i++; error->active_bo_count[ndx] = i; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (i915_gem_obj_is_pinned(obj)) - i++; + + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + i++; + break; + } + } error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; if (i) { @@ -897,7 +914,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->pinned_bo_count[ndx] = capture_pinned_bo(pinned_bo, error->pinned_bo_count[ndx], - &dev_priv->mm.bound_list); + &dev_priv->mm.bound_list, vm); error->active_bo[ndx] = active_bo; error->pinned_bo[ndx] = pinned_bo; } @@ -918,8 +935,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), GFP_ATOMIC); - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - i915_gem_capture_vm(dev_priv, error, vm, i++); + if (error->active_bo == NULL || + error->pinned_bo == NULL || + error->active_bo_count == NULL || + error->pinned_bo_count == NULL) { + kfree(error->active_bo); + kfree(error->active_bo_count); + kfree(error->pinned_bo); + kfree(error->pinned_bo_count); + + error->active_bo = NULL; + error->active_bo_count = NULL; + error->pinned_bo = NULL; + error->pinned_bo_count = NULL; + } else { + list_for_each_entry(vm, &dev_priv->vm_list, global_link) + i915_gem_capture_vm(dev_priv, error, vm, i++); + + error->vm_count = cnt; + } } /** -- 1.8.5.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx