-
static void error_print_instdone(struct drm_i915_error_state_buf *m,
const struct drm_i915_error_engine *ee)
{
@@ -734,33 +779,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
error_print_engine(m, &error->engine[i], error->epoch);
}
- for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
- char buf[128];
- int len, first = 1;
-
- if (!error->active_vm[i])
- break;
-
- len = scnprintf(buf, sizeof(buf), "Active (");
- for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
- if (error->engine[j].vm != error->active_vm[i])
- continue;
-
- len += scnprintf(buf + len, sizeof(buf), "%s%s",
- first ? "" : ", ",
- m->i915->engine[j]->name);
- first = 0;
- }
- scnprintf(buf + len, sizeof(buf), ")");
- print_error_buffers(m, buf,
- error->active_bo[i],
- error->active_bo_count[i]);
- }
-
- print_error_buffers(m, "Pinned (global)",
- error->pinned_bo,
- error->pinned_bo_count);
-
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
const struct drm_i915_error_engine *ee = &error->engine[i];
@@ -974,10 +992,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(ee->requests);
}
- for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
- kfree(error->active_bo[i]);
- kfree(error->pinned_bo);
-
kfree(error->overlay);
kfree(error->display);
@@ -990,12 +1004,12 @@ void __i915_gpu_state_free(struct kref *error_ref)
static struct drm_i915_error_object *
i915_error_object_create(struct drm_i915_private *i915,
- struct i915_vma *vma)
+ struct i915_vma *vma,
+ struct compress *compress)
{
struct i915_ggtt *ggtt = &i915->ggtt;
const u64 slot = ggtt->error_capture.start;
struct drm_i915_error_object *dst;
- struct compress compress;
unsigned long num_pages;
struct sgt_iter iter;
dma_addr_t dma;
@@ -1006,22 +1020,21 @@ i915_error_object_create(struct drm_i915_private *i915,
num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
- dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
- GFP_ATOMIC | __GFP_NOWARN);
+ dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ATOMIC_MAYFAIL);
if (!dst)
return NULL;
+ if (!compress_start(compress)) {
+ kfree(dst);
+ return NULL;
+ }
+
dst->gtt_offset = vma->node.start;
dst->gtt_size = vma->node.size;
dst->num_pages = num_pages;
dst->page_count = 0;
dst->unused = 0;
- if (!compress_init(&compress)) {
- kfree(dst);
- return NULL;
- }
-
ret = -EINVAL;
for_each_sgt_dma(dma, iter, vma->pages) {
void __iomem *s;
@@ -1029,69 +1042,23 @@ i915_error_object_create(struct drm_i915_private *i915,
ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
- ret = compress_page(&compress, (void __force *)s, dst);
+ ret = compress_page(compress, (void __force *)s, dst);
io_mapping_unmap_atomic(s);
if (ret)
break;
}
- if (ret || compress_flush(&compress, dst)) {
+ if (ret || compress_flush(compress, dst)) {
while (dst->page_count--)
- free_page((unsigned long)dst->pages[dst->page_count]);
+ pool_free(&compress->pool, dst->pages[dst->page_count]);
kfree(dst);
dst = NULL;
}
+ compress_finish(compress);
- compress_fini(&compress, dst);
return dst;
}
-static void capture_bo(struct drm_i915_error_buffer *err,
- struct i915_vma *vma)
-{
- struct drm_i915_gem_object *obj = vma->obj;
-
- err->size = obj->base.size;
- err->name = obj->base.name;
-
- err->gtt_offset = vma->node.start;
- err->read_domains = obj->read_domains;
- err->write_domain = obj->write_domain;
- err->fence_reg = vma->fence ? vma->fence->id : -1;
- err->tiling = i915_gem_object_get_tiling(obj);
- err->dirty = obj->mm.dirty;
- err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
- err->userptr = obj->userptr.mm != NULL;
- err->cache_level = obj->cache_level;
-}
-
-static u32 capture_error_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head,
- unsigned int flags)
-#define ACTIVE_ONLY BIT(0)
-#define PINNED_ONLY BIT(1)
-{
- struct i915_vma *vma;
- int i = 0;
-
- list_for_each_entry(vma, head, vm_link) {
- if (!vma->obj)
- continue;
-
- if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
- continue;
-
- if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
- continue;
-
- capture_bo(err++, vma);
- if (++i == count)
- break;
- }
-
- return i;
-}
-
/*
* Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by
@@ -1281,7 +1248,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
if (!count)
return;
- ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
+ ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL);
if (!ee->requests)
return;
@@ -1349,8 +1316,10 @@ static void record_context(struct drm_i915_error_context *e,
e->active = atomic_read(&ctx->active_count);
}
-static void request_record_user_bo(struct i915_request *request,
- struct drm_i915_error_engine *ee)
+static void
+request_record_user_bo(struct i915_request *request,
+ struct drm_i915_error_engine *ee,
+ struct compress *compress)
{
struct i915_capture_list *c;
struct drm_i915_error_object **bo;
@@ -1362,18 +1331,20 @@ static void request_record_user_bo(struct i915_request *request,
if (!max)
return;
- bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
+ bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
if (!bo) {
/* If we can't capture everything, try to capture something. */
max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
- bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
+ bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
}
if (!bo)
return;
count = 0;
for (c = request->capture_list; c; c = c->next) {
- bo[count] = i915_error_object_create(request->i915, c->vma);
+ bo[count] = i915_error_object_create(request->i915,
+ c->vma,
+ compress);
if (!bo[count])
break;
if (++count == max)
@@ -1386,7 +1357,8 @@ static void request_record_user_bo(struct i915_request *request,
static struct drm_i915_error_object *
capture_object(struct drm_i915_private *dev_priv,
- struct drm_i915_gem_object *obj)
+ struct drm_i915_gem_object *obj,
+ struct compress *compress)
{
if (obj && i915_gem_object_has_pages(obj)) {
struct i915_vma fake = {
@@ -1396,13 +1368,14 @@ capture_object(struct drm_i915_private *dev_priv,
.obj = obj,
};
- return i915_error_object_create(dev_priv, &fake);
+ return i915_error_object_create(dev_priv, &fake, compress);
} else {
return NULL;
}
}
-static void gem_record_rings(struct i915_gpu_state *error)
+static void
+gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
{
struct drm_i915_private *i915 = error->i915;
int i;
@@ -1420,6 +1393,9 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->engine_id = i;
+ /* Refill our page pool before entering atomic section */
+ pool_refill(&compress->pool, ALLOW_FAIL);
+
error_record_engine_registers(error, engine, ee);
error_record_engine_execlists(engine, ee);
@@ -1429,8 +1405,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
struct i915_gem_context *ctx = request->gem_context;
struct intel_ring *ring = request->ring;
- ee->vm = ctx->vm ?: &engine->gt->ggtt->vm;
-
record_context(&ee->context, ctx);
/* We need to copy these to an anonymous buffer
@@ -1438,17 +1412,21 @@ static void gem_record_rings(struct i915_gpu_state *error)
* by userspace.
*/
ee->batchbuffer =
- i915_error_object_create(i915, request->batch);
+ i915_error_object_create(i915,
+ request->batch,
+ compress);
if (HAS_BROKEN_CS_TLB(i915))
ee->wa_batchbuffer =
i915_error_object_create(i915,
- engine->gt->scratch);
- request_record_user_bo(request, ee);
+ engine->gt->scratch,
+ compress);
+ request_record_user_bo(request, ee, compress);
ee->ctx =
i915_error_object_create(i915,
- request->hw_context->state);
+ request->hw_context->state,
+ compress);
error->simulated |=
i915_gem_context_no_error_capture(ctx);
@@ -1460,7 +1438,9 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail;
ee->ringbuffer =
- i915_error_object_create(i915, ring->vma);
+ i915_error_object_create(i915,
+ ring->vma,
+ compress);
engine_record_requests(engine, request, ee);
}
@@ -1468,89 +1448,21 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->hws_page =
i915_error_object_create(i915,
- engine->status_page.vma);
-
- ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
-
- ee->default_state = capture_object(i915, engine->default_state);
- }
-}
-
-static void gem_capture_vm(struct i915_gpu_state *error,
- struct i915_address_space *vm,
- int idx)
-{
- struct drm_i915_error_buffer *active_bo;
- struct i915_vma *vma;
- int count;
-
- count = 0;
- list_for_each_entry(vma, &vm->bound_list, vm_link)
- if (i915_vma_is_active(vma))
- count++;
-
- active_bo = NULL;
- if (count)
- active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
- if (active_bo)
- count = capture_error_bo(active_bo,
- count, &vm->bound_list,
- ACTIVE_ONLY);
- else
- count = 0;
+ engine->status_page.vma,
+ compress);
- error->active_vm[idx] = vm;
- error->active_bo[idx] = active_bo;
- error->active_bo_count[idx] = count;
-}
-
-static void capture_active_buffers(struct i915_gpu_state *error)
-{
- int cnt = 0, i, j;
-
- BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
- BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
- BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
-
- /* Scan each engine looking for unique active contexts/vm */
- for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
- struct drm_i915_error_engine *ee = &error->engine[i];
- bool found;
-
- if (!ee->vm)
- continue;
+ ee->wa_ctx =
+ i915_error_object_create(i915,
+ engine->wa_ctx.vma,
+ compress);
- found = false;
- for (j = 0; j < i && !found; j++)
- found = error->engine[j].vm == ee->vm;
- if (!found)
- gem_capture_vm(error, ee->vm, cnt++);
+ ee->default_state =
+ capture_object(i915, engine->default_state, compress);
}
}
-static void capture_pinned_buffers(struct i915_gpu_state *error)
-{
- struct i915_address_space *vm = &error->i915->ggtt.vm;
- struct drm_i915_error_buffer *bo;
- struct i915_vma *vma;
- int count;
-
- count = 0;
- list_for_each_entry(vma, &vm->bound_list, vm_link)
- count++;
-
- bo = NULL;
- if (count)
- bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
- if (!bo)
- return;
-
- error->pinned_bo_count =
- capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
- error->pinned_bo = bo;
-}
-
-static void capture_uc_state(struct i915_gpu_state *error)
+static void
+capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
{
struct drm_i915_private *i915 = error->i915;
struct i915_error_uc *error_uc = &error->uc;
@@ -1567,9 +1479,11 @@ static void capture_uc_state(struct i915_gpu_state *error)
* As modparams are generally accesible from the userspace make
* explicit copies of the firmware paths.
*/
- error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, GFP_ATOMIC);
- error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, GFP_ATOMIC);
- error_uc->guc_log = i915_error_object_create(i915, uc->guc.log.vma);
+ error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
+ error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
+ error_uc->guc_log = i915_error_object_create(i915,
+ uc->guc.log.vma,
+ compress);
}
/* Capture all registers which don't fit into another category. */
@@ -1753,56 +1667,53 @@ static void capture_finish(struct i915_gpu_state *error)
ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
}
-static int capture(void *data)
-{
- struct i915_gpu_state *error = data;
-
- error->time = ktime_get_real();
- error->boottime = ktime_get_boottime();
- error->uptime = ktime_sub(ktime_get(),
- error->i915->gt.last_init_time);
- error->capture = jiffies;
-
- capture_params(error);
- capture_gen_state(error);
- capture_uc_state(error);
- capture_reg_state(error);
- gem_record_fences(error);
- gem_record_rings(error);
- capture_active_buffers(error);
- capture_pinned_buffers(error);
-
- error->overlay = intel_overlay_capture_error_state(error->i915);
- error->display = intel_display_capture_error_state(error->i915);
-
- error->epoch = capture_find_epoch(error);
-
- capture_finish(error);
- return 0;
-}
-
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
struct i915_gpu_state *
i915_capture_gpu_state(struct drm_i915_private *i915)
{
struct i915_gpu_state *error;
+ struct compress compress;
/* Check if GPU capture has been disabled */
error = READ_ONCE(i915->gpu_error.first_error);
if (IS_ERR(error))
return error;
- error = kzalloc(sizeof(*error), GFP_ATOMIC);
+ error = kzalloc(sizeof(*error), ALLOW_FAIL);
if (!error) {
i915_disable_error_state(i915, -ENOMEM);
return ERR_PTR(-ENOMEM);
}
+ if (!compress_init(&compress)) {
+ kfree(error);
+ i915_disable_error_state(i915, -ENOMEM);
+ return ERR_PTR(-ENOMEM);
+ }
+
kref_init(&error->ref);
error->i915 = i915;
- stop_machine(capture, error, NULL);
+ error->time = ktime_get_real();
+ error->boottime = ktime_get_boottime();
+ error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+ error->capture = jiffies;
+
+ capture_params(error);
+ capture_gen_state(error);
+ capture_uc_state(error, &compress);
+ capture_reg_state(error);
+ gem_record_fences(error);
+ gem_record_rings(error, &compress);
+
+ error->overlay = intel_overlay_capture_error_state(i915);
+ error->display = intel_display_capture_error_state(i915);
+
+ error->epoch = capture_find_epoch(error);
+
+ capture_finish(error);
+ compress_fini(&compress);
return error;
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 85f06bc5da05..a24c35107d16 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -85,7 +85,6 @@ struct i915_gpu_state {
/* Software tracked state */
bool idle;
unsigned long hangcheck_timestamp;
- struct i915_address_space *vm;
int num_requests;
u32 reset_count;
@@ -161,22 +160,6 @@ struct i915_gpu_state {
} vm_info;
} engine[I915_NUM_ENGINES];
- struct drm_i915_error_buffer {
- u32 size;
- u32 name;
- u64 gtt_offset;
- u32 read_domains;
- u32 write_domain;
- s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
- u32 tiling:2;
- u32 dirty:1;
- u32 purgeable:1;
- u32 userptr:1;
- u32 cache_level:3;
- } *active_bo[I915_NUM_ENGINES], *pinned_bo;
- u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
- struct i915_address_space *active_vm[I915_NUM_ENGINES];
-
struct scatterlist *sgl, *fit;
};