We use MI_STORE_DWORD_IMM internally (e.g. for gpu relocations) and so require that its are writes flushed to memory on demand. Verify this with a selftest. v2: Use variable lengths of submission queues as the delay between submit and checking is also crucially important for error detection. v3: Keep a ref to avoid the shrinker stealing our objects Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- .../drm/i915/selftests/i915_gem_coherency.c | 467 ++++++++++++++++++ 1 file changed, 467 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index f7392c1ffe75..791cdbffae05 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -26,6 +26,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_flush_test.h" static int cpu_set(struct drm_i915_gem_object *obj, unsigned long offset, @@ -386,10 +387,476 @@ static int igt_gem_coherency(void *arg) goto unlock; } +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_global; + unsigned int reset_engine[I915_NUM_ENGINES]; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915->gpu_error.missed_irq_rings = 0; + t->reset_global = i915_reset_count(&i915->gpu_error); + + for_each_engine(engine, i915, id) + t->reset_engine[id] = + i915_reset_engine_count(&i915->gpu_error, engine); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + return -EIO; + + if (t->reset_global != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_global); + return -EIO; + } + + for_each_engine(engine, i915, id) { + if (t->reset_engine[id] == + i915_reset_engine_count(&i915->gpu_error, engine)) + continue; + + pr_err("%s(%s): engine '%s' was reset %d times!\n", + t->func, t->name, engine->name, + i915_reset_engine_count(&i915->gpu_error, engine) - + t->reset_engine[id]); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, need_flush; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + memset32(map, value, DW_PER_PAGE); + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_obj_finish_shmem_access(obj); + obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->write_domain = 0; + return 0; +} + +static int file_add_object(struct drm_file *file, + struct drm_i915_gem_object *obj) +{ + int err; + + GEM_BUG_ON(obj->base.handle_count); + + /* tie the object to the drm_file for easy reaping */ + err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + if (err < 0) + return err; + + i915_gem_object_get(obj); + obj->base.handle_count++; + return 0; +} + +static struct drm_i915_gem_object * +create_test_object(struct drm_i915_private *i915, + unsigned int num_pages, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, num_pages << PAGE_SHIFT); + if (IS_ERR(obj)) + return obj; + + err = file_add_object(file, obj); + if (err) + goto err_put; + + err = cpu_fill(obj, STACK_MAGIC); + if (err) + goto err_put; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err_put; + + list_add_tail(&obj->st_link, objects); + return obj; + +err_put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct i915_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + batch = gpu_fill_dw(vma, + dw * sizeof(u32), + obj->base.size >> PAGE_SHIFT, + engine->id << 16 | dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_chipset_flush(vm->i915); + i915_request_add(rq); + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int coherency_check(struct drm_i915_gem_object *obj, + unsigned int idx, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + u32 x = map[m]; + + if ((x & 0xffff) != m) { + pr_err("Invalid value at page %d:%d, offset %d: found %x expected %x\n", + idx, n, m, x, m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + u32 x = map[m]; + + if (x != STACK_MAGIC) { + pr_err("Invalid value at page %d:%d, offset %d: found %x expected %x\n", + idx, n, m, x, STACK_MAGIC); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + +static int igt_mi_store_dw(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; + struct drm_file *file; + unsigned long timeout; + unsigned long npages; + struct live_test t; + int err = 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + npages = 0; + for (timeout = 1; + !err && timeout < i915_selftest.timeout_jiffies; + timeout = next_prime_number(2 * timeout)) { + unsigned long end_time = jiffies + timeout; + struct drm_i915_gem_object *obj = NULL; + struct intel_engine_cs *engine; + unsigned long ndwords, width, dw, id; + LIST_HEAD(objects); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + break; + + dw = 0; + width = 0; + ndwords = 0; + while (!time_after(jiffies, end_time)) { + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + if (!obj) { + struct i915_address_space *vm = + ctx->ppgtt ? + &ctx->ppgtt->vm : + &i915->ggtt.vm; + + npages = next_prime_number(2 * npages); + if (npages > vm->total >> PAGE_SHIFT) + goto done; + + obj = create_test_object(i915, npages, + file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto free; + } + } + + intel_runtime_pm_get(i915); + err = gpu_fill(obj, ctx, engine, dw); + intel_runtime_pm_put(i915); + if (err) { + pr_err("Failed to fill dword %lu [%lu] with gpu (%s), err=%d\n", + ndwords, dw, engine->name, err); + goto free; + } + + if (++dw == DW_PER_PAGE) { + obj = NULL; + dw = 0; + } + + ndwords += npages; + width++; + } + } +done: + dw = 0; + for_each_engine(engine, i915, id) + dw += intel_engine_can_store_dword(engine); + pr_info("Submitted %lu/%lu dwords (across %lu engines) in %lu jiffies\n", ndwords, width, dw, timeout); + +free: + dw = 0; + id = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int num_writes = + min_t(unsigned int, width - dw, DW_PER_PAGE); + + if (err == 0) + err = coherency_check(obj, id++, num_writes); + i915_gem_object_put(obj); + + dw += num_writes; + } + + if (end_live_test(&t)) + err = -EIO; + i915_retire_requests(i915); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_gem_coherency), + SUBTEST(igt_mi_store_dw), }; return i915_subtests(tests, i915); -- 2.19.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx