Periodically check, for example when idling and upon closing user contexts, whether or not some client has written into unallocated PTE in their ppGTT. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- .../drm/i915/gem/selftests/i915_gem_context.c | 24 +++++++--- .../drm/i915/gem/selftests/i915_gem_mman.c | 32 ++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 31 +------------ drivers/gpu/drm/i915/gt/intel_gt_pm.c | 10 +++++ drivers/gpu/drm/i915/gt/intel_gtt.c | 45 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gtt.h | 1 + drivers/gpu/drm/i915/i915_scheduler.c | 33 +------------- drivers/gpu/drm/i915/i915_utils.c | 29 ++++++++++++ drivers/gpu/drm/i915/i915_utils.h | 3 ++ 9 files changed, 141 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index df949320f2b5..5a9128dd3979 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1737,7 +1737,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, return err; } -static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) +static int check_ctx_scratch(struct i915_gem_context *ctx, u32 *out) { struct i915_address_space *vm; struct page *page; @@ -1770,6 +1770,17 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) return err; } +static void reset_ctx_scratch(struct i915_gem_context *ctx, u32 value) +{ + struct i915_address_space *vm = ctx_vm(ctx); + struct page *page = __px_page(vm->scratch[0]); + u32 *vaddr; + + vaddr = kmap(page); + memset32(vaddr, value, PAGE_SIZE / sizeof(value)); + kunmap(page); +} + static int igt_vm_isolation(void *arg) { struct drm_i915_private *i915 = arg; @@ -1816,11 +1827,11 @@ static int igt_vm_isolation(void *arg) goto out_file; /* Read the initial state of the scratch page */ - err = check_scratch_page(ctx_a, &expected); + err = check_ctx_scratch(ctx_a, &expected); if (err) goto out_file; - err = check_scratch_page(ctx_b, &expected); + err = check_ctx_scratch(ctx_b, &expected); if (err) goto out_file; @@ -1855,7 +1866,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_file; + goto out_scratch; if (value != expected) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1864,7 +1875,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_file; + goto out_scratch; } this++; @@ -1875,6 +1886,9 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %lu engines\n", count, num_engines); +out_scratch: + /* As we deliberately write into scratch, cover up our tracks */ + reset_ctx_scratch(ctx_a, expected); out_file: if (igt_live_test_end(&t)) err = -EIO; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index df558ce95a94..b7f41f230c8f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -155,6 +155,18 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, drm_clflush_virt_range(cpu, sizeof(*cpu)); kunmap(p); + if (check_scratch_page(vma->vm)) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) overwrote scratch\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, + tile->tiling, tile->stride); + err = -EIO; + } + out: __i915_vma_put(vma); return err; @@ -250,6 +262,9 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, return -EINTR; } + if (check_scratch_page(&to_i915(obj->base.dev)->ggtt.vm)) + return -EIO; + return 0; } @@ -399,7 +414,11 @@ static int igt_partial_tiling(void *arg) } } -next_tiling: ; +next_tiling: + if (check_scratch_page(&i915->ggtt.vm)) { + err = -EIO; + break; + } } out_unlock: @@ -496,6 +515,8 @@ static int igt_smoke_tiling(void *arg) } while (!__igt_timeout(end, NULL)); pr_info("%s: Completed %lu trials\n", __func__, count); + if (check_scratch_page(&i915->ggtt.vm)) + err = -EIO; intel_runtime_pm_put(&i915->runtime_pm, wakeref); i915_gem_object_unpin_pages(obj); @@ -703,6 +724,8 @@ static int igt_mmap_offset_exhaustion(void *arg) } } + if (check_scratch_page(&i915->ggtt.vm)) + err = -EIO; out: mmap_offset_lock(i915); out_park: @@ -904,6 +927,9 @@ static int __igt_mmap(struct drm_i915_private *i915, err = wc_check(obj); if (err == -ENXIO) err = gtt_check(obj); + + if (check_scratch_page(&i915->ggtt.vm)) + err = -EIO; out_unmap: vm_munmap(addr, obj->base.size); return err; @@ -1175,6 +1201,8 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, goto out_unmap; } + if (check_scratch_page(&i915->ggtt.vm)) + err = -EIO; out_unmap: vm_munmap(addr, obj->base.size); return err; @@ -1316,6 +1344,8 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, goto out_unmap; } + if (check_scratch_page(&i915->ggtt.vm)) + err = -EIO; out_unmap: vm_munmap(addr, obj->base.size); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 577ebd4a324f..8443794df3ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1265,35 +1265,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -static void hexdump(struct drm_printer *m, const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - drm_printf(m, "*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - drm_printf(m, "[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { @@ -1450,7 +1421,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, } drm_printf(m, "HWSP:\n"); - hexdump(m, engine->status_page.addr, PAGE_SIZE); + i915_hexdump(m, engine->status_page.addr, PAGE_SIZE); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index f41612faa269..ca7c6613662e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -11,6 +11,7 @@ #include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gtt.h" #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -100,6 +101,9 @@ static int __gt_park(struct intel_wakeref *wf) runtime_end(gt); intel_gt_park_requests(gt); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + check_scratch_page(gt->vm); + i915_vma_parked(gt); i915_pmu_gt_parked(i915); intel_rps_park(>->rps); @@ -212,6 +216,8 @@ int intel_gt_resume(struct intel_gt *gt) return err; GT_TRACE(gt, "\n"); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + check_scratch_page(gt->vm); /* * After resume, we may need to poke into the pinned kernel @@ -298,6 +304,8 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) /* Flush all the contexts and internal state before turning off GGTT */ gt_sanitize(gt, false); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + check_scratch_page(gt->vm); } static suspend_state_t pm_suspend_target(void) @@ -341,6 +349,8 @@ void intel_gt_suspend_late(struct intel_gt *gt) } gt_sanitize(gt, false); /* Be paranoid, remove all residual GPU state */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + check_scratch_page(gt->vm); GT_TRACE(gt, "\n"); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index d34770ae4c9a..1357fd440f77 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -158,10 +158,52 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch) vaddr = kmap(page); memset(vaddr, val, PAGE_SIZE); + set_page_dirty(page); /* keep the poisoned contents */ kunmap(page); } } +bool check_scratch_page(const struct i915_address_space *vm) +{ + struct drm_i915_gem_object *scratch; + struct sgt_iter sgt; + struct page *page; + void *vaddr; + u8 val; + + scratch = vm->scratch[0]; + if (!scratch || !i915_gem_object_has_struct_page(scratch)) + return false; + + val = 0; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + val = POISON_FREE; + + for_each_sgt_page(page, sgt, scratch->mm.pages) { + vaddr = kmap(page); + drm_clflush_virt_range(vaddr, PAGE_SIZE); + if (memchr_inv(vaddr, val, PAGE_SIZE)) { + struct drm_printer p = drm_err_printer(__func__); + + drm_err(&vm->i915->drm, + "%s scratch page overwitten, detected by %pS!\n", + i915_is_ggtt(vm) ? "Global" : "Per-process", + (void *)_RET_IP_); + i915_hexdump(&p, vaddr, PAGE_SIZE); + vaddr = NULL; + } + kunmap(page); + + /* Restore the poison, so fresh errors will be detected */ + if (!vaddr) { + poison_scratch_page(scratch); + return true; + } + } + + return false; +} + int setup_scratch_page(struct i915_address_space *vm) { unsigned long size; @@ -229,6 +271,9 @@ void free_scratch(struct i915_address_space *vm) { int i; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + check_scratch_page(vm); + for (i = 0; i <= vm->top; i++) i915_gem_object_put(vm->scratch[i]); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 24b5808df16d..12f7261e4c89 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -519,6 +519,7 @@ fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count); } while (0) int setup_scratch_page(struct i915_address_space *vm); +bool check_scratch_page(const struct i915_address_space *vm); void free_scratch(struct i915_address_space *vm); struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index a8fb787278e6..7241f85c9967 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -1095,35 +1095,6 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } -static void hexdump(struct drm_printer *m, const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - drm_printf(m, "*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - drm_printf(m, "[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static void print_request_ring(struct drm_printer *m, const struct i915_request *rq) { @@ -1153,7 +1124,7 @@ print_request_ring(struct drm_printer *m, const struct i915_request *rq) } memcpy(ring + len, vaddr + head, size - len); - hexdump(m, ring, size); + i915_hexdump(m, ring, size); kfree(ring); } } @@ -1195,7 +1166,7 @@ void i915_sched_show(struct drm_printer *m, if (rq->context->lrc_reg_state) { drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); + i915_hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 894de60833ec..432ad0926586 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -49,6 +49,35 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, } } +void i915_hexdump(struct drm_printer *m, const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + drm_printf(m, "*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + drm_printf(m, "[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + void add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint) { __i915_printk(i915, KERN_NOTICE, "CI tainted:%#x by %pS\n", diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 4618fe8aacb5..c82461d6ae71 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -32,6 +32,7 @@ #include <linux/workqueue.h> struct drm_i915_private; +struct drm_printer; struct timer_list; #define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs" @@ -82,6 +83,8 @@ bool i915_error_injected(void); __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) +void i915_hexdump(struct drm_printer *m, const void *buf, size_t len); + #if defined(GCC_VERSION) && GCC_VERSION >= 70000 #define add_overflows_t(T, A, B) \ __builtin_add_overflow_p((A), (B), (T)0) -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx