Now that we replay the non guilty contexts and always replay the default ctx, even when guilty, the assumptions of how many active and pending batches there was in the time of reset has changed. Driver doesn't increment pending counts for contexts that it considered unaffected by reset. Because it can now replay the queued requests. For contexts, guilty of reset, we replay the request envelopes, but nop the batchbuffer starts. This changes of how many 'hangs' we think there were queued. As future queued hangs are now NOPed out, we need to change the assumption of active counts also. Adapt to these changes of how the replaying of batches affect the assertions in pending/active counting and banning tests. While doing this, throw out the retrying to be more strict about the determinism we want to achieve. Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- tests/gem_reset_stats.c | 154 ++++++++++++++++++------------------------------ 1 file changed, 56 insertions(+), 98 deletions(-) diff --git a/tests/gem_reset_stats.c b/tests/gem_reset_stats.c index 3de74af..2718a33 100644 --- a/tests/gem_reset_stats.c +++ b/tests/gem_reset_stats.c @@ -152,7 +152,7 @@ static struct timespec ts_injected; #define BAN HANG_ALLOW_BAN #define ASYNC 2 -static void inject_hang(int fd, uint32_t ctx, +static bool inject_hang(int fd, uint32_t ctx, const struct intel_execution_engine *e, unsigned flags) { @@ -163,6 +163,8 @@ static void inject_hang(int fd, uint32_t ctx, hang = igt_hang_ctx(fd, ctx, e->exec_id | e->flags, flags & BAN, NULL); if ((flags & ASYNC) == 0) igt_post_hang_ring(fd, hang); + + return hang.handle >= 0; } static const char *status_to_string(int x) @@ -239,7 +241,7 @@ static void test_rs(const struct intel_execution_engine *e, if (i == hang_index) assert_reset_status(i, fd[i], 0, RS_BATCH_ACTIVE); if (i > hang_index) - assert_reset_status(i, fd[i], 0, RS_BATCH_PENDING); + assert_reset_status(i, fd[i], 0, RS_NO_ERROR); } igt_assert(igt_seconds_elapsed(&ts_injected) <= 30); @@ -312,10 +314,10 @@ static void test_rs_ctx(const struct intel_execution_engine *e, RS_BATCH_ACTIVE); if (i == hang_index && j > hang_context) assert_reset_status(i, fd[i], ctx[i][j], - RS_BATCH_PENDING); + RS_NO_ERROR); if (i > hang_index) assert_reset_status(i, fd[i], ctx[i][j], - RS_BATCH_PENDING); + RS_NO_ERROR); } } @@ -325,128 +327,84 @@ static void test_rs_ctx(const struct intel_execution_engine *e, } } -static void test_ban(const struct intel_execution_engine *e) +static void test_ban_ctx(const struct intel_execution_engine *e, + const bool ban_default) { struct local_drm_i915_reset_stats rs_bad, rs_good; - int fd_bad, fd_good; - int ban, retry = 10; + int fd_bad, fd_good, i; + uint32_t ctx_good, ctx_bad; int active_count = 0, pending_count = 0; fd_bad = drm_open_driver(DRIVER_INTEL); fd_good = drm_open_driver(DRIVER_INTEL); - assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR); + ctx_good = gem_context_create(fd_bad); + if (!ban_default) + ctx_bad = gem_context_create(fd_bad); + else + ctx_bad = 0; + + assert_reset_status(fd_bad, fd_bad, ctx_bad, RS_NO_ERROR); + assert_reset_status(fd_bad, fd_bad, ctx_good, RS_NO_ERROR); assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR); - noop(fd_bad, 0, e); - noop(fd_good, 0, e); + igt_assert_lt(0, noop(fd_bad, ctx_bad, e)); + igt_assert_lt(0, noop(fd_bad, ctx_good, e)); + igt_assert_lt(0, noop(fd_good, 0, e)); - assert_reset_status(fd_bad, fd_bad, 0, RS_NO_ERROR); + assert_reset_status(fd_bad, fd_bad, ctx_bad, RS_NO_ERROR); + assert_reset_status(fd_bad, fd_bad, ctx_good, RS_NO_ERROR); assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR); - inject_hang(fd_bad, 0, e, BAN | ASYNC); - active_count++; + inject_hang(fd_bad, ctx_bad, e, BAN | ASYNC); + /* The next synced ban below ban will get nopped if default ctx */ + if (!ctx_bad) + active_count++; - noop(fd_good, 0, e); - noop(fd_good, 0, e); + igt_assert_lt(0, noop(fd_good, 0, e)); + /* We don't skip requests for default ctx, so no + * pending change as they will be replayed */ + igt_assert_lt(0, noop(fd_bad, 0, e)); + igt_assert_lt(0, noop(fd_bad, ctx_good, e)); - /* The second hang will count as pending and be discarded */ - active_count--; - pending_count += 1; /* inject hang does 1 real exec + 1 dummy */ - while (retry--) { - inject_hang(fd_bad, 0, e, BAN); - active_count++; + i = 0; + do { + /* Check if we are already kicked out */ + if (noop(fd_bad, ctx_bad, e) < 0) + break; - ban = noop(fd_bad, 0, e); - if (ban == -EIO) + if (inject_hang(fd_bad, ctx_bad, e, BAN)) + active_count++; + else break; - /* Should not happen often but sometimes hang is declared too - * slow due to our way of faking hang using loop */ - gem_close(fd_bad, ban); + i++; + } while (i < 15); + + /* ctx case needs more as default one will get one nopped */ + if (ctx_bad) + igt_assert_eq(i, 5); + else + igt_assert_eq(i, 4); - igt_info("retrying for ban (%d)\n", retry); - } - igt_assert_eq(ban, -EIO); igt_assert_lt(0, noop(fd_good, 0, e)); + igt_assert_eq(-EIO, noop(fd_bad, ctx_bad, e)); + igt_assert_lt(0, noop(fd_bad, ctx_good, e)); - assert_reset_status(fd_bad, fd_bad, 0, RS_BATCH_ACTIVE); - igt_assert_eq(gem_reset_stats(fd_bad, 0, &rs_bad), 0); + assert_reset_status(fd_bad, fd_bad, ctx_bad, RS_BATCH_ACTIVE); + igt_assert_eq(gem_reset_stats(fd_bad, ctx_bad, &rs_bad), 0); igt_assert_eq(rs_bad.batch_active, active_count); igt_assert_eq(rs_bad.batch_pending, pending_count); - assert_reset_status(fd_good, fd_good, 0, RS_BATCH_PENDING); + assert_reset_status(fd_good, fd_good, 0, RS_NO_ERROR); igt_assert_eq(gem_reset_stats(fd_good, 0, &rs_good), 0); igt_assert_eq(rs_good.batch_active, 0); - igt_assert_eq(rs_good.batch_pending, 2); + igt_assert_eq(rs_good.batch_pending, 0); close(fd_bad); close(fd_good); } -static void test_ban_ctx(const struct intel_execution_engine *e) -{ - struct local_drm_i915_reset_stats rs_bad, rs_good; - int fd, ban, retry = 10; - uint32_t ctx_good, ctx_bad; - int active_count = 0, pending_count = 0; - - fd = drm_open_driver(DRIVER_INTEL); - - assert_reset_status(fd, fd, 0, RS_NO_ERROR); - - ctx_good = gem_context_create(fd); - ctx_bad = gem_context_create(fd); - - assert_reset_status(fd, fd, 0, RS_NO_ERROR); - assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR); - assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR); - - noop(fd, ctx_bad, e); - noop(fd, ctx_good, e); - - assert_reset_status(fd, fd, ctx_good, RS_NO_ERROR); - assert_reset_status(fd, fd, ctx_bad, RS_NO_ERROR); - - inject_hang(fd, ctx_bad, e, BAN | ASYNC); - active_count++; - - noop(fd, ctx_good, e); - noop(fd, ctx_good, e); - - /* This second hang will count as pending and be discarded */ - active_count--; - pending_count++; - while (retry--) { - inject_hang(fd, ctx_bad, e, BAN); - active_count++; - - ban = noop(fd, ctx_bad, e); - if (ban == -EIO) - break; - - /* Should not happen often but sometimes hang is declared too - * slow due to our way of faking hang using loop */ - gem_close(fd, ban); - - igt_info("retrying for ban (%d)\n", retry); - } - igt_assert_eq(ban, -EIO); - igt_assert_lt(0, noop(fd, ctx_good, e)); - - assert_reset_status(fd, fd, ctx_bad, RS_BATCH_ACTIVE); - igt_assert_eq(gem_reset_stats(fd, ctx_bad, &rs_bad), 0); - igt_assert_eq(rs_bad.batch_active, active_count); - igt_assert_eq(rs_bad.batch_pending, pending_count); - - assert_reset_status(fd, fd, ctx_good, RS_BATCH_PENDING); - igt_assert_eq(gem_reset_stats(fd, ctx_good, &rs_good), 0); - igt_assert_eq(rs_good.batch_active, 0); - igt_assert_eq(rs_good.batch_pending, 2); - - close(fd); -} - static void test_unrelated_ctx(const struct intel_execution_engine *e) { int fd1,fd2; @@ -812,10 +770,10 @@ igt_main RUN_CTX_TEST(test_rs_ctx(e, 4, 4, 1, 2)); igt_subtest_f("ban-%s", e->name) - RUN_TEST(test_ban(e)); + RUN_TEST(test_ban_ctx(e, true)); igt_subtest_f("ban-ctx-%s", e->name) - RUN_CTX_TEST(test_ban_ctx(e)); + RUN_CTX_TEST(test_ban_ctx(e, false)); igt_subtest_f("reset-count-%s", e->name) RUN_TEST(test_reset_count(e, false)); -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx