On Haswell, at least, MI_REPORT_PERF_COUNT is not flushed by the PIPECONTROL surrounding the batch. (In theory, before the breadcrumb is updated the CPU's view of memory is coherent with the GPU, i.e. all writes have landed and are visible to userspace. This does not appear to be the case for MI_REPORT_PERF_COUNT.) This makes it an unreliable method for querying the timestamp, so use MI_STORE_REGISTER_MEM instead. Testcase: igt/perf/oa-exponents Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> Cc: Matthew Auld <matthew.auld@xxxxxxxxx> --- tests/perf.c | 80 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/tests/perf.c b/tests/perf.c index 05ec7a472..92e32d93c 100644 --- a/tests/perf.c +++ b/tests/perf.c @@ -657,47 +657,46 @@ emit_report_perf_count(struct intel_batchbuffer *batch, } static uint32_t -i915_get_one_gpu_timestamp(uint32_t *context_id) +i915_get_one_gpu_timestamp(void) { - drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); - drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr); - drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64); - struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid); - int ret; + const bool r64b = intel_gen(devid) >= 8; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_relocation_entry reloc; + uint32_t batch[16]; uint32_t timestamp; + int i; - drm_intel_bufmgr_gem_enable_reuse(bufmgr); - - if (context_id) { - ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id); - igt_assert_eq(ret, 0); - } - - igt_assert(mi_rpc_ctx); - igt_assert(mi_rpc_bo); - igt_assert(mi_rpc_batch); - - ret = drm_intel_bo_map(mi_rpc_bo, true); - igt_assert_eq(ret, 0); - memset(mi_rpc_bo->virtual, 0x80, 4096); - drm_intel_bo_unmap(mi_rpc_bo); - - emit_report_perf_count(mi_rpc_batch, - mi_rpc_bo, /* dst */ - 0, /* dst offset in bytes */ - 0xdeadbeef); /* report ID */ - - intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx); - - ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */); - igt_assert_eq(ret, 0); - timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1]; - drm_intel_bo_unmap(mi_rpc_bo); + memset(obj, 0, sizeof(obj)); + obj[0].handle = gem_create(drm_fd, 4096); + obj[1].handle = gem_create(drm_fd, 4096); + obj[1].relocs_ptr = to_user_pointer(&reloc); + obj[1].relocation_count = 1; - drm_intel_bo_unreference(mi_rpc_bo); - intel_batchbuffer_free(mi_rpc_batch); - drm_intel_gem_context_destroy(mi_rpc_ctx); - drm_intel_bufmgr_destroy(bufmgr); + i = 0; + batch[i++] = 0x24 << 23 | (1 + r64b); /* SRM */ + batch[i++] = 0x2358; /* RCS0 timestamp */ + reloc.target_handle = obj[0].handle; + reloc.presumed_offset = obj[0].offset; + reloc.offset = i * sizeof(batch[0]); + reloc.delta = 0; + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + reloc.write_domain = I915_GEM_DOMAIN_RENDER; + batch[i++] = reloc.delta; + if (r64b) + batch[i++] = 0; + batch[i] = MI_BATCH_BUFFER_END; + gem_write(drm_fd, obj[1].handle, 0, batch, sizeof(batch)); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(obj); + execbuf.buffer_count = 2; + execbuf.batch_len = 4096; + gem_execbuf(drm_fd, &execbuf); + gem_close(drm_fd, obj[1].handle); + + gem_read(drm_fd, obj[0].handle, 0, ×tamp, sizeof(timestamp)); + gem_close(drm_fd, obj[0].handle); return timestamp; } @@ -1866,7 +1865,6 @@ test_oa_exponents(void) uint32_t n_reports = 0; uint32_t n_idle_reports = 0; uint32_t n_reads = 0; - uint32_t context_id; uint64_t first_timestamp = 0; bool check_first_timestamp = true; struct drm_i915_perf_record_header *header; @@ -1895,7 +1893,7 @@ test_oa_exponents(void) * first timestamp as way to filter previously * scheduled work that would have configured * the OA unit at a different period. */ - first_timestamp = i915_get_one_gpu_timestamp(&context_id); + first_timestamp = i915_get_one_gpu_timestamp(); while (n_reads < ARRAY_SIZE(reads) && n_reports < ARRAY_SIZE(reports)) { @@ -2021,8 +2019,8 @@ test_oa_exponents(void) uint32_t *rpt = NULL, *last = NULL, *last_periodic = NULL; igt_debug(" > More than 5%% error: avg_ts_delta = %"PRIu64", delta_delta = %"PRIu64", " - "expected_delta = %"PRIu64", first_timestamp = %"PRIu64" ctx_id=%"PRIu32"\n", - average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp, context_id); + "expected_delta = %"PRIu64", first_timestamp = %"PRIu64"\n", + average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp); for (int i = 0; i < (n_reports - 1); i++) { /* XXX: calculating with u32 arithmetic to account for overflow */ uint32_t u32_delta = -- 2.15.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx