When we allow a wait on a future future fence, it must autoexpire if the fence is never signaled by userspace. Also put future fences to work, as the intention is to use them, along with WAIT_SUBMIT and semaphores, for userspace to perform its own fine-grained scheduling. Or simply run concurrent clients without having to flush batches between context switches. v2: Verify deadlock detection Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- tests/i915/gem_exec_fence.c | 558 +++++++++++++++++++++++++++++++++++- 1 file changed, 555 insertions(+), 3 deletions(-) diff --git a/tests/i915/gem_exec_fence.c b/tests/i915/gem_exec_fence.c index 4b0d87e4d..e51b7452e 100644 --- a/tests/i915/gem_exec_fence.c +++ b/tests/i915/gem_exec_fence.c @@ -46,6 +46,15 @@ struct sync_merge_data { #define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data) #endif +#define MI_SEMAPHORE_WAIT (0x1c << 23) +#define MI_SEMAPHORE_POLL (1 << 15) +#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) +#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) +#define MI_SEMAPHORE_SAD_LT_SDD (2 << 12) +#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) +#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) +#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) + static void store(int fd, const struct intel_execution_engine2 *e, int fence, uint32_t target, unsigned offset_value) { @@ -907,11 +916,12 @@ static void test_syncobj_invalid_wait(int fd) struct drm_i915_gem_exec_fence fence = { .handle = syncobj_create(fd, 0), }; + int out; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = to_user_pointer(&obj); execbuf.buffer_count = 1; - execbuf.flags = I915_EXEC_FENCE_ARRAY; + execbuf.flags = I915_EXEC_FENCE_ARRAY | I915_EXEC_FENCE_OUT; execbuf.cliprects_ptr = to_user_pointer(&fence); execbuf.num_cliprects = 1; @@ -919,14 +929,59 @@ static void test_syncobj_invalid_wait(int fd) obj.handle = gem_create(fd, 4096); gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe)); - /* waiting before the fence is set is invalid */ + /* waiting before the fence is set is^W may be invalid */ fence.flags = I915_EXEC_FENCE_WAIT; - igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL); + if (__gem_execbuf_wr(fd, &execbuf)) { + igt_assert_eq(__gem_execbuf(fd, &execbuf), -EINVAL); + return; + } + + /* If we do allow the wait on a future fence, it should autoexpire */ + gem_sync(fd, obj.handle); + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), -ETIMEDOUT); + close(out); gem_close(fd, obj.handle); syncobj_destroy(fd, fence.handle); } +static void test_syncobj_incomplete_wait_submit(int i915) +{ + struct drm_i915_gem_exec_object2 obj = { + .handle = batch_create(i915), + }; + struct drm_i915_gem_exec_fence fence = { + .handle = syncobj_create(i915, 0), + .flags = I915_EXEC_FENCE_WAIT | I915_EXEC_FENCE_WAIT_SUBMIT, + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + + .flags = I915_EXEC_FENCE_ARRAY | I915_EXEC_FENCE_OUT, + }; + int out; + + /* waiting before the fence is set is^W may be invalid */ + if (__gem_execbuf_wr(i915, &execbuf)) { + igt_assert_eq(__gem_execbuf(i915, &execbuf), -EINVAL); + return; + } + + /* If we do allow the wait on a future fence, it should autoexpire */ + gem_sync(i915, obj.handle); + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), -ETIMEDOUT); + close(out); + + gem_close(i915, obj.handle); + syncobj_destroy(i915, fence.handle); +} + static void test_syncobj_invalid_flags(int fd) { const uint32_t bbe = MI_BATCH_BUFFER_END; @@ -1073,6 +1128,398 @@ static void test_syncobj_wait(int fd) } } +static uint32_t future_batch(int i915, uint32_t offset) +{ + uint32_t handle = gem_create(i915, 4096); + const int gen = intel_gen(intel_get_drm_devid(i915)); + uint32_t cs[16]; + int i = 0; + + cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + cs[++i] = offset + 4000; + cs[++i] = 0; + } else if (gen >= 4) { + cs[++i] = 0; + cs[++i] = offset + 4000; + } else { + cs[i]--; + cs[++i] = offset + 4000; + } + cs[++i] = 1; + cs[i + 1] = MI_BATCH_BUFFER_END; + gem_write(i915, handle, 0, cs, sizeof(cs)); + + cs[i] = 2; + gem_write(i915, handle, 64, cs, sizeof(cs)); + + return handle; +} + +static void test_syncobj_future(int i915, unsigned int engine) +{ + struct drm_i915_gem_exec_object2 obj = { + .offset = 24 << 20, + .handle = future_batch(i915, 24 << 20), + .flags = EXEC_OBJECT_PINNED, + }; + struct drm_i915_gem_exec_fence fence = {}; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + }; + const struct intel_execution_engine2 *e; + + /* + * Client A is waiting on a future fence from B. So even though its + * execbuf is called first, we need to hold it in a queue waiting on + * B. + */ + igt_require(gem_scheduler_enabled(i915)); + + __for_each_physical_engine(i915, e) { + uint32_t result; + + igt_debug("waiting on future %s\n", e->name); + fence.handle = syncobj_create(i915, 0); + + fence.flags = I915_EXEC_FENCE_WAIT; + execbuf.batch_start_offset = 0; + execbuf.flags = engine | I915_EXEC_FENCE_ARRAY; + execbuf.rsvd1 = 0; + gem_execbuf(i915, &execbuf); /* writes 1 */ + + fence.flags = I915_EXEC_FENCE_SIGNAL; + execbuf.batch_start_offset = 64; + execbuf.flags = e->flags | I915_EXEC_FENCE_ARRAY; + execbuf.rsvd1 = gem_context_clone_with_engines(i915, 0); + gem_execbuf(i915, &execbuf); /* writes 2 */ + gem_context_destroy(i915, execbuf.rsvd1); + + syncobj_destroy(i915, fence.handle); + gem_sync(i915, obj.handle); /* write hazard lies */ + gem_read(i915, obj.handle, 4000, &result, sizeof(result)); + igt_assert_eq(result, 1); + } + + gem_close(i915, obj.handle); +} + +static uint32_t future_submit_batch(int i915, uint32_t offset) +{ + uint32_t handle = gem_create(i915, 4096); + const int gen = intel_gen(intel_get_drm_devid(i915)); + uint32_t cs[16]; + int i = 0; + + cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + cs[++i] = offset + 4000; + cs[++i] = 0; + } else if (gen >= 4) { + cs[++i] = 0; + cs[++i] = offset + 4000; + } else { + cs[i]--; + cs[++i] = offset + 4000; + } + cs[++i] = 1; + cs[i + 1] = MI_BATCH_BUFFER_END; + igt_assert(i + 1 < ARRAY_SIZE(cs)); + gem_write(i915, handle, 0, cs, sizeof(cs)); + + i = 0; + cs[i++] = + MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD | + (4 - 2); + cs[i++] = 1; + cs[i++] = offset + 4000; + cs[i++] = 0; + + cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + cs[++i] = offset + 4000; + cs[++i] = 0; + } else if (gen >= 4) { + cs[++i] = 0; + cs[++i] = offset + 4000; + } else { + cs[i]--; + cs[++i] = offset + 4000; + } + cs[++i] = 2; + cs[++i] = MI_BATCH_BUFFER_END; + igt_assert(i < ARRAY_SIZE(cs)); + + gem_write(i915, handle, 64, cs, sizeof(cs)); + + return handle; +} + +static void test_syncobj_future_submit(int i915, unsigned int engine) +{ + struct drm_i915_gem_exec_object2 obj = { + .offset = 24 << 20, + .handle = future_submit_batch(i915, 24 << 20), + .flags = EXEC_OBJECT_PINNED, + }; + const struct intel_execution_engine2 *e; + + /* + * Here we submit client A waiting on client B, but internally client + * B has a semaphore that waits on client A. This relies on timeslicing + * to reorder B before A, even though userspace has asked to submit + * A & B simultaneously (and due to the sequence we will submit B + * then A). + */ + igt_require(gem_scheduler_has_timeslicing(i915)); + + __for_each_physical_engine(i915, e) { + struct drm_i915_gem_exec_fence fence = { + .handle = syncobj_create(i915, 0), + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + .flags = engine | I915_EXEC_FENCE_ARRAY, + }; + uint32_t result; + int out; + + igt_debug("waiting on future %s\n", e->name); + + execbuf.rsvd1 = gem_context_clone_with_engines(i915, 0); + fence.flags = I915_EXEC_FENCE_WAIT | I915_EXEC_FENCE_WAIT_SUBMIT; + execbuf.batch_start_offset = 0; + execbuf.flags |= I915_EXEC_FENCE_OUT; + igt_require(__gem_execbuf_wr(i915, &execbuf) == 0); /* writes 1 */ + execbuf.flags &= ~I915_EXEC_FENCE_OUT; + gem_context_destroy(i915, execbuf.rsvd1); + + execbuf.rsvd1 = gem_context_clone_with_engines(i915, 0); + fence.flags = I915_EXEC_FENCE_SIGNAL; + execbuf.batch_start_offset = 64; + execbuf.flags &= ~I915_EXEC_RING_MASK; + execbuf.flags |= e->flags; + gem_execbuf(i915, &execbuf); /* writes 2 */ + gem_context_destroy(i915, execbuf.rsvd1); + + syncobj_destroy(i915, fence.handle); + gem_sync(i915, obj.handle); /* write hazard lies */ + gem_read(i915, obj.handle, 4000, &result, sizeof(result)); + igt_assert_eq(result, 2); + + /* check we didn't autotimeout */ + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), 1); + close(out); + } + + gem_close(i915, obj.handle); +} + +static void test_syncobj_future_past(int i915, unsigned int engine, int flags) +{ + struct drm_i915_gem_exec_object2 obj = { + .offset = 24 << 20, + .handle = future_batch(i915, 24 << 20), + .flags = EXEC_OBJECT_PINNED, + }; + struct drm_i915_gem_exec_fence fence = { + .handle = syncobj_create(i915, 0), + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + .flags = engine | I915_EXEC_FENCE_ARRAY, + }; + uint32_t result; + int out; + + fence.flags = I915_EXEC_FENCE_WAIT | I915_EXEC_FENCE_SIGNAL | flags; + execbuf.batch_start_offset = 0; + execbuf.flags |= I915_EXEC_FENCE_OUT; + igt_require(__gem_execbuf_wr(i915, &execbuf) == 0); /* writes 1 */ + execbuf.flags &= ~I915_EXEC_FENCE_OUT; + + gem_sync(i915, obj.handle); /* write hazard lies */ + gem_read(i915, obj.handle, 4000, &result, sizeof(result)); + igt_assert_eq(result, 1); + + /* check we didn't autotimeout */ + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), 1); + close(out); + + gem_close(i915, obj.handle); + syncobj_destroy(i915, fence.handle); +} + +static void test_syncobj_future_self(int i915, unsigned int engine, int flags) +{ + struct drm_i915_gem_exec_object2 obj = { + .offset = 24 << 20, + .handle = future_batch(i915, 24 << 20), + .flags = EXEC_OBJECT_PINNED, + }; + struct drm_i915_gem_exec_fence fence = { + .handle = syncobj_create(i915, 0), + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + .flags = engine | I915_EXEC_FENCE_ARRAY, + }; + int out; + + fence.flags = I915_EXEC_FENCE_WAIT | flags; + execbuf.batch_start_offset = 0; + execbuf.flags |= I915_EXEC_FENCE_OUT; + igt_require(__gem_execbuf_wr(i915, &execbuf) == 0); /* writes 1 */ + execbuf.flags &= ~I915_EXEC_FENCE_OUT; + + fence.flags = I915_EXEC_FENCE_SIGNAL; + execbuf.batch_start_offset = 64; + gem_execbuf(i915, &execbuf); /* writes 2 */ + + gem_sync(i915, obj.handle); /* write hazard lies */ + + gem_close(i915, obj.handle); + syncobj_destroy(i915, fence.handle); + + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), -EDEADLK); + close(out); +} + +static void +test_syncobj_future_deadlock(int i915, unsigned int engine, int flags) +{ + struct drm_i915_gem_exec_object2 obj = { + .offset = 24 << 20, + .handle = future_batch(i915, 24 << 20), + .flags = EXEC_OBJECT_PINNED, + }; + struct drm_i915_gem_exec_fence fence = {}; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + }; + const struct intel_execution_engine2 *e; + + __for_each_physical_engine(i915, e) { + int out; + + fence.handle = syncobj_create(i915, 0), + + fence.flags = I915_EXEC_FENCE_WAIT | flags; + execbuf.batch_start_offset = 0; + execbuf.flags = engine | I915_EXEC_FENCE_ARRAY, + execbuf.flags |= I915_EXEC_FENCE_OUT; + execbuf.rsvd1 = 0; + gem_execbuf_wr(i915, &execbuf); /* writes 1 */ + + fence.flags = I915_EXEC_FENCE_SIGNAL; + execbuf.batch_start_offset = 64; + execbuf.flags = e->flags | I915_EXEC_FENCE_ARRAY, + execbuf.flags |= I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN; + execbuf.rsvd1 = gem_context_clone_with_engines(i915, 0); + execbuf.rsvd2 >>= 32; + gem_execbuf_wr(i915, &execbuf); /* writes 2 */ + gem_context_destroy(i915, execbuf.rsvd1); + + syncobj_destroy(i915, fence.handle); + gem_sync(i915, obj.handle); + + /* How should this deadlock be resolved? */ + out = execbuf.rsvd2; + igt_assert_eq(sync_fence_status(out), -EDEADLK); + close(out); + + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), -EDEADLK); + close(out); + } + + gem_close(i915, obj.handle); +} + +static void +test_syncobj_future_cycle(int i915, unsigned int engine, int flags) +{ + struct drm_i915_gem_exec_object2 obj = { + .offset = 24 << 20, + .handle = future_batch(i915, 24 << 20), + .flags = EXEC_OBJECT_PINNED, + }; + const struct intel_execution_engine2 *e1, *e2; + + __for_each_physical_engine(i915, e1) { + __for_each_physical_engine(i915, e2) { + struct drm_i915_gem_exec_fence fence = { + .handle = syncobj_create(i915, 0), + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + .flags = engine | I915_EXEC_FENCE_ARRAY, + }; + int out; + + fence.flags = I915_EXEC_FENCE_WAIT | flags; + execbuf.batch_start_offset = 0; + execbuf.flags |= I915_EXEC_FENCE_OUT; + igt_require(__gem_execbuf_wr(i915, &execbuf) == 0); + + fence.flags = 0; + execbuf.rsvd1 = gem_context_clone_with_engines(i915, 0); + execbuf.rsvd2 >>= 32; + execbuf.flags &= ~I915_EXEC_RING_MASK; + execbuf.flags |= e1->flags | I915_EXEC_FENCE_IN; + gem_execbuf_wr(i915, &execbuf); + gem_context_destroy(i915, execbuf.rsvd1); + close(execbuf.rsvd2); + + fence.flags = I915_EXEC_FENCE_SIGNAL; + execbuf.rsvd1 = gem_context_clone_with_engines(i915, 0); + execbuf.rsvd2 >>= 32; + execbuf.flags &= ~I915_EXEC_RING_MASK; + execbuf.flags |= e2->flags; + execbuf.batch_start_offset = 64; + gem_execbuf_wr(i915, &execbuf); /* writes 2 */ + gem_context_destroy(i915, execbuf.rsvd1); + + syncobj_destroy(i915, fence.handle); + gem_sync(i915, obj.handle); + + + /* How should this deadlock be resolved? */ + out = execbuf.rsvd2 >> 32; + igt_assert_eq(sync_fence_status(out), -EDEADLK); + close(out); + + out = execbuf.rsvd2; + igt_assert_eq(sync_fence_status(out), -EDEADLK); + close(out); + }} + + gem_close(i915, obj.handle); +} + static void test_syncobj_export(int fd) { const uint32_t bbe = MI_BATCH_BUFFER_END; @@ -1358,6 +1805,105 @@ static void test_syncobj_channel(int fd) syncobj_destroy(fd, syncobj[i]); } +static bool has_future_syncobj(int i915) +{ + struct drm_i915_gem_exec_object2 obj = { + .handle = batch_create(i915), + }; + struct drm_i915_gem_exec_fence fence = { + .handle = syncobj_create(i915, 0), + .flags = I915_EXEC_FENCE_WAIT | I915_EXEC_FENCE_SIGNAL, + }; + struct drm_i915_gem_execbuffer2 execbuf = { + .buffers_ptr = to_user_pointer(&obj), + .buffer_count = 1, + .cliprects_ptr = to_user_pointer(&fence), + .num_cliprects = 1, + .flags = I915_EXEC_FENCE_ARRAY, + }; + bool result; + + result = __gem_execbuf(i915, &execbuf) == 0; + gem_close(i915, obj.handle); + + return result; +} + +static void syncobj_futures(int i915) +{ + const struct intel_execution_engine2 *e; + + igt_fixture { + igt_require(gem_scheduler_enabled(i915)); + igt_require(has_future_syncobj(i915)); + } + + igt_subtest_with_dynamic("syncobj-future") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future(i915, e->flags); + } + + igt_subtest_with_dynamic("syncobj-future-past") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_past(i915, e->flags, 0); + } + + + igt_subtest_with_dynamic("syncobj-future-submit") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_submit(i915, e->flags); + } + + igt_subtest_with_dynamic("syncobj-future-submit-past") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_past(i915, e->flags, + I915_EXEC_FENCE_WAIT_SUBMIT); + } + + igt_subtest_with_dynamic("syncobj-future-self") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_self(i915, e->flags, 0); + } + + igt_subtest_with_dynamic("syncobj-future-self-submit") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_self(i915, e->flags, + I915_EXEC_FENCE_WAIT_SUBMIT); + } + + igt_subtest_with_dynamic("syncobj-future-deadlock") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_deadlock(i915, e->flags, 0); + } + + igt_subtest_with_dynamic("syncobj-future-submit-deadlock") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_deadlock(i915, e->flags, + I915_EXEC_FENCE_WAIT_SUBMIT); + } + + igt_subtest_with_dynamic("syncobj-future-cycle") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_cycle(i915, e->flags, 0); + } + + igt_subtest_with_dynamic("syncobj-future-submit-cycle") + __for_each_physical_engine(i915, e) { + igt_dynamic_f("%s", e->name) + test_syncobj_future_cycle(i915, e->flags, + I915_EXEC_FENCE_WAIT_SUBMIT); + } +} + igt_main { const struct intel_execution_engine2 *e; @@ -1537,6 +2083,9 @@ igt_main igt_subtest("syncobj-invalid-wait") test_syncobj_invalid_wait(i915); + igt_subtest("syncobj-incomplete-wait-submit") + test_syncobj_incomplete_wait_submit(i915); + igt_subtest("syncobj-invalid-flags") test_syncobj_invalid_flags(i915); @@ -1546,6 +2095,9 @@ igt_main igt_subtest("syncobj-wait") test_syncobj_wait(i915); + igt_subtest_group + syncobj_futures(i915); + igt_subtest("syncobj-export") test_syncobj_export(i915); -- 2.26.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx