Simple regression test to check that we don't trample the rq->reserved_space when returning from emit_pte(), if the ring is nearly full. v2: Make spinner_kill() static v3: Reduce the ring size further, which should mean we need to execute less noops; hopefully this appeases bsw. Also add some debug logging. v4: Fix the min request construction to account for reserved_space + I915_EMIT_PTE_NUM_DWORDS v5: Use a simple on-stack timer to kill the spinner instead of kthread (Chris) References: https://gitlab.freedesktop.org/drm/intel/-/issues/7535 References: https://gitlab.freedesktop.org/drm/intel/-/issues/6889 Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> Cc: Chris Wilson <chris.p.wilson@xxxxxxxxx> Cc: Andi Shyti <andi.shyti@xxxxxxxxxxxxxxx> Cc: Andrzej Hajda <andrzej.hajda@xxxxxxxxx> Cc: Nirmoy Das <nirmoy.das@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_migrate.c | 6 +- drivers/gpu/drm/i915/gt/selftest_migrate.c | 145 +++++++++++++++++++++ 2 files changed, 149 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 48c3b5168558..6df728b82a73 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -352,6 +352,8 @@ static int max_pte_pkt_size(struct i915_request *rq, int pkt) return pkt; } +#define I915_EMIT_PTE_NUM_DWORDS 6 + static int emit_pte(struct i915_request *rq, struct sgt_dma *it, enum i915_cache_level cache_level, @@ -393,7 +395,7 @@ static int emit_pte(struct i915_request *rq, offset += (u64)rq->engine->instance << 32; - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, I915_EMIT_PTE_NUM_DWORDS); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -416,7 +418,7 @@ static int emit_pte(struct i915_request *rq, intel_ring_advance(rq, cs); intel_ring_update_space(ring); - cs = intel_ring_begin(rq, 6); + cs = intel_ring_begin(rq, I915_EMIT_PTE_NUM_DWORDS); if (IS_ERR(cs)) return PTR_ERR(cs); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 1eab025ac002..e677f2da093d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" +#include "selftests/igt_spinner.h" #include "selftests/i915_random.h" static const unsigned int sizes[] = { @@ -529,6 +530,149 @@ static int live_migrate_clear(void *arg) return 0; } +struct spinner_timer { + struct timer_list timer; + struct igt_spinner spin; +}; + +static void spinner_kill(struct timer_list *timer) +{ + struct spinner_timer *st = from_timer(st, timer, timer); + + igt_spinner_end(&st->spin); + pr_info("%s\n", __func__); +} + +static int live_emit_pte_full_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_migrate *migrate = >->migrate; + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_request *rq, *prev; + struct spinner_timer st; + struct sgt_dma it; + int len, sz, err; + u32 *cs; + + /* + * Simple regression test to check that we don't trample the + * rq->reserved_space when returning from emit_pte(), if the ring is + * nearly full. + */ + + if (igt_spinner_init(&st.spin, to_gt(i915))) + return -ENOMEM; + + obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_spinner; + } + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) + goto out_obj; + + ce = intel_migrate_create_context(migrate); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_obj; + } + + ce->ring_size = SZ_4K; /* Not too big */ + + err = intel_context_pin(ce); + if (err) + goto out_put; + + rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&st.spin, rq)) { + err = -EIO; + goto out_unpin; + } + + /* + * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS + + * ring->reserved_space at the end. To actually emit the PTEs we require + * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is + * greater than PAGE_SIZE. The correct behaviour is to wait for more + * ring space in emit_pte(), otherwise we trample on the reserved_space + * resulting in crashes when later submitting the rq. + */ + + prev = NULL; + do { + if (prev) + i915_request_add(rq); + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) - + I915_EMIT_PTE_NUM_DWORDS; + sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) - + I915_EMIT_PTE_NUM_DWORDS); + cs = intel_ring_begin(rq, sz); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_rq; + } + + memset32(cs, MI_NOOP, sz); + cs += sz; + intel_ring_advance(rq, cs); + + pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz); + + prev = rq; + } while (rq->ring->space > (rq->reserved_space + + I915_EMIT_PTE_NUM_DWORDS * sizeof(u32))); + + timer_setup_on_stack(&st.timer, spinner_kill, 0); + mod_timer(&st.timer, jiffies + 2 * HZ); + + /* + * This should wait for the spinner to be killed, otherwise we should go + * down in flames when doing i915_request_add(). + */ + pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space); + it = sg_sgt(obj->mm.pages->sgl); + len = emit_pte(rq, &it, obj->cache_level, false, 0, CHUNK_SZ); + if (!len) { + err = -EINVAL; + goto out_rq; + } + if (len < 0) { + err = len; + goto out_rq; + } + +out_rq: + i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ + del_timer_sync(&st.timer); + destroy_timer_on_stack(&st.timer); +out_unpin: + intel_context_unpin(ce); +out_put: + intel_context_put(ce); +out_obj: + i915_gem_object_put(obj); +out_spinner: + igt_spinner_fini(&st.spin); + return err; +} + struct threaded_migrate { struct intel_migrate *migrate; struct task_struct *tsk; @@ -651,6 +795,7 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_migrate_copy), SUBTEST(live_migrate_clear), + SUBTEST(live_emit_pte_full_ring), SUBTEST(thread_migrate_copy), SUBTEST(thread_migrate_clear), SUBTEST(thread_global_copy), -- 2.38.1