Test description suggested that all platforms were testing qword writes, while in fact only gen4-gen5 did. v2: Test dword/qword writes for all available platforms. v3: Rewrite, drop libdrm/intel_batchbuffer dependencies, drop brw_emit_post_sync_nonzero_flush WA for gen6/gen7, drop WC_FLUSH/TC_FLUSH on gen4/gen5, drop preuse tests, use gem_wait instead of set_domain. v4: Back to preuse, do not use gem_write. Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> --- tests/gem_pipe_control_store_loop.c | 290 +++++++++++++++++++++--------------- 1 file changed, 173 insertions(+), 117 deletions(-) diff --git a/tests/gem_pipe_control_store_loop.c b/tests/gem_pipe_control_store_loop.c index a155ad1..3cf4b31 100644 --- a/tests/gem_pipe_control_store_loop.c +++ b/tests/gem_pipe_control_store_loop.c @@ -26,10 +26,10 @@ */ /* - * Testcase: (TLB-)Coherency of pipe_control QW writes + * Testcase: (TLB-)Coherency of pipe_control writes * - * Writes a counter-value into an always newly allocated target bo (by disabling - * buffer reuse). Decently trashes on tlb inconsistencies, too. + * Writes a counter-value into target bo. + * Decently trashes on tlb inconsistencies, too. */ #include "igt.h" #include <stdlib.h> @@ -43,11 +43,11 @@ #include "drm.h" #include "intel_bufmgr.h" -IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control QW writes."); +IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control writes."); -static drm_intel_bufmgr *bufmgr; -struct intel_batchbuffer *batch; uint32_t devid; +int gen; +int fd; #define GFX_OP_PIPE_CONTROL ((0x3<<29)|(0x3<<27)|(0x2<<24)|2) #define PIPE_CONTROL_WRITE_IMMEDIATE (1<<14) @@ -60,134 +60,190 @@ uint32_t devid; #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ -/* Like the store dword test, but we create new command buffers each time */ +#define TEST_STORE_LOOP_BUFFER_REUSED (1 << 0) +#define TEST_STORE_LOOP_QWORD_WRITE (1 << 1) +#define TEST_STORE_LOOP_ALL_FLAGS (TEST_STORE_LOOP_BUFFER_REUSED | \ + TEST_STORE_LOOP_QWORD_WRITE) + +static uint64_t +preuse(uint32_t buf_handle) +{ + int i = 0; + uint32_t batch_handle; + uint32_t *batch; + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 object[2]; + + batch_handle = gem_create(fd, 4096); + batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | PROT_WRITE); + + batch[i++] = XY_COLOR_BLT_CMD_NOLEN | + COLOR_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB; + if (gen >= 8) + batch[i - 1] |= 5; + else + batch[i - 1] |= 4; + + batch[i++] = ((3 << 24) | (0xf0 << 16) | 64); + batch[i++] = 0; /* dst x1,y1 */ + batch[i++] = (1 << 16 | 1); /* dst x2,y2 */ + batch[i++] = 0; /* reloc */ + if (gen >= 8) + batch[i++] = 0; /* reloc_high */ + batch[i++] = 0xdeadbeef; + batch[i++] = MI_BATCH_BUFFER_END; + + memset(&object, 0, sizeof(object)); + memset(&reloc, 0, sizeof(reloc)); + memset(&execbuf, 0, sizeof(execbuf)); + + reloc.target_handle = buf_handle; + reloc.delta = 0; + reloc.offset = 4 * sizeof(batch[0]); + reloc.presumed_offset = 0; + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + reloc.write_domain = I915_GEM_DOMAIN_RENDER; + + object[0].handle = buf_handle; + object[1].handle = batch_handle; + object[1].relocation_count = 1; + object[1].relocs_ptr = (uintptr_t)&reloc; + + if (gen >= 6) + execbuf.flags = I915_EXEC_BLT; + execbuf.buffers_ptr = (uintptr_t)object; + execbuf.buffer_count = 2; + gem_execbuf(fd, &execbuf); + + munmap(batch, 4096); + gem_close(fd, batch_handle); + + return reloc.presumed_offset; +} + static void -store_pipe_control_loop(bool preuse_buffer) +store_pipe_control_loop(uint32_t flags) { - int i, val = 0; + const bool preuse_buffer = flags & TEST_STORE_LOOP_BUFFER_REUSED; + const bool qword_write = flags & TEST_STORE_LOOP_QWORD_WRITE; + + int val, i; + uint32_t reloc_offset; + uint64_t presumed_offset; + uint32_t batch_handle; + uint32_t *batch; + uint32_t buf_handle; uint32_t *buf; - drm_intel_bo *target_bo; - - for (i = 0; i < SLOW_QUICK(0x10000, 4); i++) { - /* we want to check tlb consistency of the pipe_control target, - * so get a new buffer every time around */ - target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096); - igt_assert(target_bo); - - if (preuse_buffer) { - COLOR_BLIT_COPY_BATCH_START(0); - OUT_BATCH((3 << 24) | (0xf0 << 16) | 64); - OUT_BATCH(0); - OUT_BATCH(1 << 16 | 1); - - /* - * IMPORTANT: We need to preuse the buffer in a - * different domain than what the pipe control write - * (and kernel wa) uses! - */ - OUT_RELOC_FENCED(target_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - OUT_BATCH(0xdeadbeef); - ADVANCE_BATCH(); - - intel_batchbuffer_flush(batch); - } - - /* gem_storedw_batches_loop.c is a bit overenthusiastic with - * creating new batchbuffers - with buffer reuse disabled, the - * support code will do that for us. */ - if (batch->gen >= 8) { - BEGIN_BATCH(4, 1); - OUT_BATCH(GFX_OP_PIPE_CONTROL + 1); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC_FENCED(target_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT); - OUT_BATCH(val); /* write data */ - ADVANCE_BATCH(); - - } else if (batch->gen >= 6) { - /* work-around hw issue, see intel_emit_post_sync_nonzero_flush - * in mesa sources. */ - BEGIN_BATCH(4, 1); - OUT_BATCH(GFX_OP_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - OUT_BATCH(0); /* address */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); - - BEGIN_BATCH(4, 1); - OUT_BATCH(GFX_OP_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(target_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT); - OUT_BATCH(val); /* write data */ - ADVANCE_BATCH(); - } else if (batch->gen >= 4) { - BEGIN_BATCH(4, 1); - OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH | - PIPE_CONTROL_TC_FLUSH | - PIPE_CONTROL_WRITE_IMMEDIATE | 2); - OUT_RELOC(target_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT); - OUT_BATCH(val); - OUT_BATCH(0xdeadbeef); - ADVANCE_BATCH(); + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 object[2]; + + /* no dword writes on gen4/gen5 and gen9+ */ + if (!qword_write) + igt_skip_on(gen < 6 || gen > 8); + + batch_handle = gem_create(fd, 4096); + batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | PROT_WRITE); + + for (val = 0; val < SLOW_QUICK(0x10000, 4); val++) { + i = 0; + buf_handle = gem_create(fd, 4096); + + buf = gem_mmap__cpu(fd, buf_handle, 0, 4096, PROT_READ | PROT_WRITE); + gem_set_domain(fd, buf_handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + buf[0] = 0xdeadbeef; + buf[1] = 0xdeadbeef; + if (preuse_buffer) + presumed_offset = preuse(buf_handle); + else + presumed_offset = 0; + + if (gen == 6) + presumed_offset |= PIPE_CONTROL_GLOBAL_GTT; + + if (gen >= 6) { + batch[i++] = GFX_OP_PIPE_CONTROL + (gen >= 8) + qword_write; + batch[i++] = PIPE_CONTROL_WRITE_IMMEDIATE; + batch[i++] = (uint32_t)presumed_offset; /* reloc */ + reloc_offset = i - 1; + if (gen >= 8) + batch[i++] = (uint32_t)(presumed_offset >> 32); /* reloc_high */ + } else { + /* qword write */ + batch[i++] = (GFX_OP_PIPE_CONTROL | + PIPE_CONTROL_WRITE_IMMEDIATE); + batch[i++] = (uint32_t)presumed_offset; /* reloc */ + reloc_offset = i - 1; } - intel_batchbuffer_flush_on_ring(batch, 0); - - drm_intel_bo_map(target_bo, 1); - - buf = target_bo->virtual; - igt_assert(buf[0] == val); - - drm_intel_bo_unmap(target_bo); - /* Make doublesure that this buffer won't get reused. */ - drm_intel_bo_disable_reuse(target_bo); - drm_intel_bo_unreference(target_bo); - - val++; + batch[i++] = val; /* write data */ + if (qword_write) + batch[i++] = ~val; /* dword_high */ + else + batch[i++] = MI_NOOP | 0xabcd; + batch[i++] = MI_BATCH_BUFFER_END; + + memset(object, 0, sizeof(object)); + memset(&reloc, 0, sizeof(reloc)); + memset(&execbuf, 0, sizeof(execbuf)); + + reloc.target_handle = buf_handle; + reloc.delta = 0; + reloc.offset = reloc_offset * sizeof(batch[0]); + reloc.presumed_offset = 0; + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + reloc.write_domain = I915_GEM_DOMAIN_RENDER; + + if (gen == 6) + object[0].flags |= EXEC_OBJECT_NEEDS_GTT; + object[0].handle = buf_handle; + object[1].handle = batch_handle; + object[1].relocation_count = 1; + object[1].relocs_ptr = (uintptr_t)&reloc; + + execbuf.buffers_ptr = (uintptr_t)object; + execbuf.buffer_count = 2; + gem_execbuf(fd, &execbuf); + + gem_wait(fd, buf_handle, NULL); + + igt_assert_eq_u32(buf[0], val); + if (qword_write) + igt_assert_eq_u32(buf[1], ~val); + else + igt_assert_eq_u32(buf[1], 0xdeadbeef); + + munmap(buf, 4096); + gem_close(fd, buf_handle); } -} -int fd; + munmap(batch, 4096); + gem_close(fd, batch_handle); +} igt_main { igt_fixture { fd = drm_open_driver(DRIVER_INTEL); devid = intel_get_drm_devid(fd); + gen = intel_gen(devid); - bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); - igt_assert(bufmgr); - - igt_skip_on(IS_GEN2(devid) || IS_GEN3(devid)); + igt_skip_on(gen < 4); igt_skip_on(devid == PCI_CHIP_I965_G); /* has totally broken pipe control */ - - /* IMPORTANT: No call to - * drm_intel_bufmgr_gem_enable_reuse(bufmgr); - * here because we wan't to have fresh buffers (to trash the tlb) - * every time! */ - - batch = intel_batchbuffer_alloc(bufmgr, devid); - igt_assert(batch); } - igt_subtest("fresh-buffer") - store_pipe_control_loop(false); - - igt_subtest("reused-buffer") - store_pipe_control_loop(true); - - igt_fixture { - intel_batchbuffer_free(batch); - drm_intel_bufmgr_destroy(bufmgr); + for (uint32_t flags = 0; flags < TEST_STORE_LOOP_ALL_FLAGS + 1; flags++) { + igt_subtest_f("%sbuffer%s", + flags & TEST_STORE_LOOP_BUFFER_REUSED ? + "reused-" : "fresh-", + flags & TEST_STORE_LOOP_QWORD_WRITE ? + "-qword-write" : "") { + store_pipe_control_loop(flags); + } + } + igt_fixture close(fd); - } } -- 2.8.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx