On 08/23, Lionel Landwerlin wrote: > New issues that were discovered while making the tests work on Gen8+ : > > - we need to measure timings between periodic reports and discard all > other kind of reports > > - it seems periodicity of the reports can be affected outside of RC6 > (frequency change), we can detect this by looking at the amount of > clock cycles per timestamp deltas > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- > tests/perf.c | 734 ++++++++++++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 600 insertions(+), 134 deletions(-) > > diff --git a/tests/perf.c b/tests/perf.c > index ca69440d..e54a14ed 100644 > --- a/tests/perf.c > +++ b/tests/perf.c > @@ -28,6 +28,7 @@ > #include <fcntl.h> > #include <inttypes.h> > #include <errno.h> > +#include <signal.h> > #include <sys/stat.h> > #include <sys/time.h> > #include <sys/times.h> > @@ -306,6 +307,25 @@ static uint32_t (*read_report_ticks)(uint32_t *report, > static void (*sanity_check_reports)(uint32_t *oa_report0, uint32_t *oa_report1, > enum drm_i915_oa_format format); > > +static bool > +timestamp_delta_within(uint32_t delta, > + uint32_t expected_delta, > + uint32_t margin) > +{ > + return delta >= (expected_delta - margin) && > + delta <= (expected_delta + margin); > +} > + > +static bool > +double_value_within(double value, > + double expected, > + double percent_margin) > +{ > + return value >= (expected - expected * percent_margin / 100.0) && > + value <= (expected + expected * percent_margin / 100.0); > + > +} > + > static void > __perf_close(int fd) > { > @@ -472,6 +492,20 @@ gen8_read_report_ticks(uint32_t *report, enum drm_i915_oa_format format) > return report[3]; > } > > +static void > +gen8_read_report_clock_ratios(uint32_t *report, > + uint32_t *slice_freq_mhz, > + uint32_t *unslice_freq_mhz) > +{ > + uint32_t unslice_freq = report[0] & 0x1ff; > + uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; > + uint32_t slice_freq_high = (report[0] >> 9) & 0x3; > + uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); > + > + *slice_freq_mhz = (slice_freq * 16666) / 1000; > + *unslice_freq_mhz = (unslice_freq * 16666) / 1000; > +} > + > static const char * > gen8_read_report_reason(const uint32_t *report) > { > @@ -494,29 +528,6 @@ gen8_read_report_reason(const uint32_t *report) > return "unknown"; > } > > -static bool > -oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report) > -{ > - if (IS_HASWELL(devid)) { > - /* For Haswell we don't have a documented report reason field > - * (though empirically report[0] bit 10 does seem to correlate > - * with a timer trigger reason) so we instead infer which > - * reports are timer triggered by checking if the least > - * significant bits are zero and the exponent bit is set. > - */ > - uint32_t oa_exponent_mask = (1 << (oa_exponent + 1)) - 1; > - > - if ((report[1] & oa_exponent_mask) != (1 << oa_exponent)) > - return true; > - } else { > - if ((report[0] >> OAREPORT_REASON_SHIFT) & > - OAREPORT_REASON_TIMER) > - return true; > - } > - > - return false; > -} > - > static uint64_t > timebase_scale(uint32_t u32_delta) > { > @@ -563,6 +574,29 @@ oa_exponent_to_ns(int exponent) > return 1000000000ULL * (2ULL << exponent) / timestamp_frequency; > } > > +static bool > +oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report) > +{ > + if (IS_HASWELL(devid)) { > + /* For Haswell we don't have a documented report reason field > + * (though empirically report[0] bit 10 does seem to correlate > + * with a timer trigger reason) so we instead infer which > + * reports are timer triggered by checking if the least > + * significant bits are zero and the exponent bit is set. > + */ > + uint32_t oa_exponent_mask = (1 << (oa_exponent + 1)) - 1; > + > + if ((report[1] & oa_exponent_mask) == (1 << oa_exponent)) > + return true; > + } else { > + if ((report[0] >> OAREPORT_REASON_SHIFT) & > + OAREPORT_REASON_TIMER) > + return true; > + } > + > + return false; > +} > + > static bool > oa_report_ctx_is_valid(uint32_t *report) > { > @@ -578,6 +612,128 @@ oa_report_ctx_is_valid(uint32_t *report) > igt_assert(!"reached"); > } > > +static uint32_t > +oa_report_get_ctx_id(uint32_t *report) > +{ > + if (!oa_report_ctx_is_valid(report)) > + return 0xffffffff; > + return report[2]; > +} > + > +static double > +oa_reports_tick_per_period(uint32_t *report0, uint32_t *report1) > +{ > + if (intel_gen(devid) < 8) > + return 0.0; > + > + /* Measure the number GPU tick delta to timestamp delta. */ > + return (double) (report1[3] - report0[3]) / > + (double) (report1[1] - report0[1]); > +} > + > +static void > +scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color) > +{ > + int ret; > + > + ret = drm_intel_bo_map(bo, true /* writable */); > + igt_assert_eq(ret, 0); > + > + for (int i = 0; i < width * height; i++) > + ((uint32_t *)bo->virtual)[i] = color; > + > + drm_intel_bo_unmap(bo); > +} > + > +static void > +scratch_buf_init(drm_intel_bufmgr *bufmgr, > + struct igt_buf *buf, > + int width, int height, > + uint32_t color) > +{ > + size_t stride = width * 4; > + size_t size = stride * height; > + drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096); > + > + scratch_buf_memset(bo, width, height, color); > + > + buf->bo = bo; > + buf->stride = stride; > + buf->tiling = I915_TILING_NONE; > + buf->size = size; > +} > + > +static void > +emit_report_perf_count(struct intel_batchbuffer *batch, > + drm_intel_bo *dst_bo, > + int dst_offset, > + uint32_t report_id) > +{ > + if (IS_HASWELL(devid)) { > + BEGIN_BATCH(3, 1); > + OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT); > + OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, > + dst_offset); > + OUT_BATCH(report_id); > + ADVANCE_BATCH(); > + } else { > + /* XXX: NB: n dwords arg is actually magic since it internally > + * automatically accounts for larger addresses on gen >= 8... > + */ > + BEGIN_BATCH(3, 1); > + OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT); > + OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, > + dst_offset); > + OUT_BATCH(report_id); > + ADVANCE_BATCH(); > + } > +} > + > +static uint32_t > +i915_get_one_gpu_timestamp(uint32_t *context_id) > +{ > + drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); > + drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr); > + drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64); > + struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid); > + int ret; > + uint32_t timestamp; > + > + drm_intel_bufmgr_gem_enable_reuse(bufmgr); > + > + if (context_id) { > + ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id); > + igt_assert_eq(ret, 0); > + } > + > + igt_assert(mi_rpc_ctx); > + igt_assert(mi_rpc_bo); > + igt_assert(mi_rpc_batch); > + > + ret = drm_intel_bo_map(mi_rpc_bo, true); > + igt_assert_eq(ret, 0); > + memset(mi_rpc_bo->virtual, 0x80, 4096); > + drm_intel_bo_unmap(mi_rpc_bo); > + > + emit_report_perf_count(mi_rpc_batch, > + mi_rpc_bo, /* dst */ > + 0, /* dst offset in bytes */ > + 0xdeadbeef); /* report ID */ > + > + intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx); > + > + ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */); > + igt_assert_eq(ret, 0); > + timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1]; > + drm_intel_bo_unmap(mi_rpc_bo); > + > + drm_intel_bo_unreference(mi_rpc_bo); > + intel_batchbuffer_free(mi_rpc_batch); > + drm_intel_gem_context_destroy(mi_rpc_ctx); > + drm_intel_bufmgr_destroy(bufmgr); > + > + return timestamp; > +} > > static void > hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1, > @@ -1032,7 +1188,6 @@ i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format, > return total_len; > } > > - > /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system > * control parameter dev.i915.perf_stream_paranoid == 0 */ > static void > @@ -1347,20 +1502,6 @@ open_and_read_2_oa_reports(int format_id, > __perf_close(stream_fd); > } > > -static void > -gen8_read_report_clock_ratios(uint32_t *report, > - uint32_t *slice_freq_mhz, > - uint32_t *unslice_freq_mhz) > -{ > - uint32_t unslice_freq = report[0] & 0x1ff; > - uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; > - uint32_t slice_freq_high = (report[0] >> 9) & 0x3; > - uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); > - > - *slice_freq_mhz = (slice_freq * 16666) / 1000; > - *unslice_freq_mhz = (unslice_freq * 16666) / 1000; > -} > - > static void > print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt) > { > @@ -1552,74 +1693,457 @@ test_oa_formats(void) > } > } > > + > +enum load { > + LOW, > + HIGH > +}; > + > +#define LOAD_HELPER_PAUSE_USEC 500 > + > +static struct load_helper { > + int devid; > + int has_ppgtt; > + drm_intel_bufmgr *bufmgr; > + drm_intel_context *context; > + uint32_t context_id; > + struct intel_batchbuffer *batch; > + drm_intel_bo *target_buffer; Unused. > + enum load load; > + bool exit; > + struct igt_helper_process igt_proc; > + struct igt_buf src, dst; > +} lh = { 0, }; Isn't this just zero initializing .devid with that stray comma, but then again this is static so we needn't even bother... > + > +static void load_helper_signal_handler(int sig) > +{ > + if (sig == SIGUSR2) > + lh.load = lh.load == LOW ? HIGH : LOW; > + else > + lh.exit = true; > +} > + > +static void load_helper_set_load(enum load load) > +{ > + igt_assert(lh.igt_proc.running); > + > + if (lh.load == load) > + return; > + > + lh.load = load; > + kill(lh.igt_proc.pid, SIGUSR2); > +} > + > +static void load_helper_run(enum load load) > +{ > + /* > + * FIXME fork helpers won't get cleaned up when started from within a > + * subtest, so handle the case where it sticks around a bit too long. > + */ > + if (lh.igt_proc.running) { > + load_helper_set_load(load); > + return; > + } > + > + lh.load = load; > + > + igt_fork_helper(&lh.igt_proc) { > + signal(SIGUSR1, load_helper_signal_handler); > + signal(SIGUSR2, load_helper_signal_handler); > + > + while (!lh.exit) { > + int ret; > + > + render_copy(lh.batch, > + lh.context, > + &lh.src, 0, 0, 1920, 1080, > + &lh.dst, 0, 0); > + > + intel_batchbuffer_flush_with_context(lh.batch, > + lh.context); > + > + ret = drm_intel_gem_context_get_id(lh.context, > + &lh.context_id); > + igt_assert_eq(ret, 0); > + > + drm_intel_bo_wait_rendering(lh.dst.bo); > + > + /* Lower the load by pausing after every submitted > + * write. */ > + if (lh.load == LOW) > + usleep(LOAD_HELPER_PAUSE_USEC); > + } > + } > +} > + > +static void load_helper_stop(void) > +{ > + kill(lh.igt_proc.pid, SIGUSR1); > + igt_assert(igt_wait_helper(&lh.igt_proc) == 0); > +} > + > +static void load_helper_init(void) > +{ > + int ret; > + > + lh.devid = intel_get_drm_devid(drm_fd); > + lh.has_ppgtt = gem_uses_ppgtt(drm_fd); > + > + /* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs > + * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but > + * that's also all we care about for the rps testcase*/ > + igt_assert(intel_gen(lh.devid) >= 6); > + lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); > + igt_assert(lh.bufmgr); > + > + drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr); > + > + lh.context = drm_intel_gem_context_create(lh.bufmgr); > + igt_assert(lh.context); > + > + lh.context_id = 0xffffffff; > + ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id); > + igt_assert_eq(ret, 0); > + igt_assert_neq(lh.context_id, 0xffffffff); > + > + lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid); > + igt_assert(lh.batch); > + > + scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0); > + scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0); > +} > + > +static void load_helper_fini(void) > +{ > + if (lh.igt_proc.running) > + load_helper_stop(); > + > + if (lh.src.bo) > + drm_intel_bo_unreference(lh.src.bo); > + if (lh.dst.bo) > + drm_intel_bo_unreference(lh.dst.bo); > + > + if (lh.batch) > + intel_batchbuffer_free(lh.batch); > + > + if (lh.context) > + drm_intel_gem_context_destroy(lh.context); > + > + if (lh.bufmgr) > + drm_intel_bufmgr_destroy(lh.bufmgr); > +} > + > static void > test_oa_exponents(void) > { > - igt_debug("Testing OA timer exponents\n"); > + load_helper_init(); > + load_helper_run(HIGH); > > /* It's asking a lot to sample with a 160 nanosecond period and the > * test can fail due to buffer overflows if it wasn't possible to > * keep up, so we don't start from an exponent of zero... > */ > - for (int i = 5; i < 20; i++) { > - uint32_t expected_timestamp_delta; > - uint32_t timestamp_delta; > - uint32_t oa_report0[64]; > - uint32_t oa_report1[64]; > + for (int exponent = 5; exponent < 18; exponent++) { > + uint64_t expected_timestamp_delta; > uint32_t time_delta; > - uint32_t clock_delta; > - uint32_t freq; > int n_tested = 0; > + int n_time_delta_matches = 0; > > /* The exponent is effectively selecting a bit in the timestamp > * to trigger reports on and so in practice we expect the raw > * timestamp deltas for periodic reports to exactly match the > * value of next bit. > */ > - expected_timestamp_delta = 2 << i; > + expected_timestamp_delta = 2UL << exponent; > > for (int j = 0; n_tested < 10 && j < 100; j++) { > - uint32_t ticks0, ticks1; > - > - igt_debug("ITER %d: testing OA exponent %d (period = %"PRIu64"ns)\n", > - j, i, oa_exponent_to_ns(i)); > - > - open_and_read_2_oa_reports(test_oa_format, > - i, /* exponent */ > - oa_report0, > - oa_report1, > - true); /* timer triggered > - reports only */ > - > - timestamp_delta = oa_report1[1] - oa_report0[1]; > - igt_assert_neq(timestamp_delta, 0); > - > - if (timestamp_delta != expected_timestamp_delta) { > - igt_debug("timestamp0 = %u/0x%x\n", > - oa_report0[1], oa_report0[1]); > - igt_debug("timestamp1 = %u/0x%x\n", > - oa_report1[1], oa_report1[1]); > + uint64_t properties[] = { > + /* Include OA reports in samples */ > + DRM_I915_PERF_PROP_SAMPLE_OA, true, > + > + /* OA unit configuration */ > + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, > + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, > + DRM_I915_PERF_PROP_OA_EXPONENT, exponent, > + }; > + struct drm_i915_perf_open_param param = { > + .flags = I915_PERF_FLAG_FD_CLOEXEC, > + .num_properties = ARRAY_SIZE(properties) / 2, > + .properties_ptr = to_user_pointer(properties), > + }; > + int ret; > + uint64_t average_timestamp_delta; > + uint32_t n_reports = 0; > + uint32_t n_report_lost = 0; > + uint32_t n_idle_reports = 0; > + uint32_t n_reads = 0; > + uint32_t context_id; > + uint64_t first_timestamp = 0; > + bool check_first_timestamp = true; > + struct drm_i915_perf_record_header *header; > + uint64_t delta_delta; > + struct { > + uint32_t report[64]; > + } reports[30]; > + struct { > + uint8_t *buf; > + size_t len; > + } reads[1000]; > + double error; > + double tick_per_period; > + > + igt_debug("ITER %d: testing OA exponent %d," > + " expected ts delta = %"PRIu64" (%"PRIu64"ns/%.2fus/%.2fms)\n", > + j, exponent, > + expected_timestamp_delta, > + oa_exponent_to_ns(exponent), > + oa_exponent_to_ns(exponent) / 1000.0, > + oa_exponent_to_ns(exponent) / (1000.0 * 1000.0)); > + > + stream_fd = __perf_open(drm_fd, ¶m); > + > + /* Right after opening the OA stream, read a > + * first timestamp as way to filter previously > + * scheduled work that would have configured > + * the OA unit at a different period. */ > + first_timestamp = i915_get_one_gpu_timestamp(&context_id); > + > + while (n_reads < ARRAY_SIZE(reads) && > + n_reports < ARRAY_SIZE(reports)) { > + const size_t buf_size = 1024 * 1024; > + uint8_t *buf = reads[n_reads++].buf = calloc(1, buf_size); > + > + while ((ret = read(stream_fd, buf, buf_size)) < 0 && > + errno == EINTR) > + ; > + > + /* We should never have no data. */ > + igt_assert(ret > 0); > + reads[n_reads - 1].len = ret; > + > + igt_debug(" > read %i bytes\n", ret); > + > + for (int offset = 0; > + offset < ret && n_reports < ARRAY_SIZE(reports); > + offset += header->size) { > + uint32_t *report; > + double previous_tick_per_period; > + > + header = (void *)(buf + offset); > + > + if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST) { > + igt_assert(!"reached"); > + break; > + } > + > + if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) { > + n_report_lost++; > + n_reports = 0; > + n_report_lost = 0; Eh? > + n_idle_reports = 0; > + for (int r = 0; r < n_reads; r++) > + free(reads[r].buf); > + n_reads = 0; > + break; > + } > + > + if (header->type != DRM_I915_PERF_RECORD_SAMPLE) > + continue; > + > + report = (void *)(header + 1); > + > + /* Skip anything before the first > + * timestamp, it might not be at the > + * right periodic exponent. */ > + if (check_first_timestamp && > + report[1] < first_timestamp) { > + igt_debug(" > Dropping ts=%u (prior %"PRIu64")\n", > + report[1], first_timestamp); > + continue; > + } > + > + /* Once we've passed the first > + * timestamp, no need to check. */ > + check_first_timestamp = false; > + > + if (!oa_report_ctx_is_valid(report)) > + n_idle_reports++; > + > + /* We only measure timestamps between > + * periodic reports. */ > + if (!oa_report_is_periodic(exponent, report)) > + continue; > + > + igt_debug(" > write %i timestamp=%u\n", n_reports, report[1]); > + memcpy(reports[n_reports].report, report, > + sizeof(reports[n_reports].report)); > + n_reports++; > + > + previous_tick_per_period = tick_per_period; > + > + if (n_reports > 1) { > + tick_per_period = > + oa_reports_tick_per_period(reports[n_reports - 2].report, > + reports[n_reports - 1].report); > + > + /* Dismiss the series of report > + * if we notice clock frequency > + * changes. */ > + if (!double_value_within(tick_per_period, > + previous_tick_per_period, 5)) { > + igt_debug("Noticed clock frequency change at ts=%u (%f / %f), " > + "dropping reports and trying again\n", > + report[1], previous_tick_per_period, tick_per_period); > + n_reports = 0; > + n_report_lost = 0; > + n_idle_reports = 0; > + for (int r = 0; r < n_reads; r++) > + free(reads[r].buf); > + n_reads = 0; > + break; > + } > + } > + } > } > > - igt_assert_eq(timestamp_delta, expected_timestamp_delta); > + __perf_close(stream_fd); > + igt_debug("closed stream\n"); > > - ticks0 = read_report_ticks(oa_report0, test_oa_format); > - ticks1 = read_report_ticks(oa_report1, test_oa_format); > - clock_delta = ticks1 - ticks0; > + igt_assert_eq(n_reports, ARRAY_SIZE(reports)); > > - time_delta = timebase_scale(timestamp_delta); > + average_timestamp_delta = 0; > + for (int i = 0; i < (n_reports - 1); i++) { > + /* XXX: calculating with u32 arithmetic to account for overflow */ > + uint32_t u32_delta = reports[i + 1].report[1] - reports[i].report[1]; > > - freq = ((uint64_t)clock_delta * 1000) / time_delta; > - igt_debug("ITER %d: time delta = %"PRIu32"(ns) clock delta = %"PRIu32" freq = %"PRIu32"(mhz)\n", > - j, time_delta, clock_delta, freq); > + average_timestamp_delta += u32_delta; > + } > + average_timestamp_delta /= (n_reports - 1); > + > + if (average_timestamp_delta > expected_timestamp_delta) > + delta_delta = average_timestamp_delta - expected_timestamp_delta; > + else > + delta_delta = expected_timestamp_delta - average_timestamp_delta; > + error = (delta_delta / (double)expected_timestamp_delta) * 100.0; > + > + time_delta = timebase_scale(average_timestamp_delta); > + > + igt_debug(" > Avg. time delta = %"PRIu32"(ns)," > + " lost reports = %u, n idle reports = %u," > + " n reads = %u, error=%f\n", > + time_delta, n_report_lost, n_idle_reports, n_reads, error); > + if (error > 5) { > + uint32_t *rpt = NULL, *last = NULL, *last_periodic = NULL; > + > + igt_debug(" > More than 5%% error: avg_ts_delta = %"PRIu64", delta_delta = %"PRIu64", " > + "expected_delta = %"PRIu64", first_timestamp = %"PRIu64" ctx_id=%"PRIu32"\n", > + average_timestamp_delta, delta_delta, expected_timestamp_delta, first_timestamp, context_id); > + for (int i = 0; i < (n_reports - 1); i++) { > + /* XXX: calculating with u32 arithmetic to account for overflow */ > + uint32_t u32_delta = > + reports[i + 1].report[1] - reports[i].report[1]; > + > + if (u32_delta > expected_timestamp_delta) > + delta_delta = u32_delta - expected_timestamp_delta; > + else > + delta_delta = expected_timestamp_delta - u32_delta; > + error = (delta_delta / (double)expected_timestamp_delta) * 100.0; > + > + igt_debug(" > ts=%u-%u timestamp delta from %2d to %2d = %-8u (error = %u%%, ctx_id = %x)\n", > + reports[i + 1].report[1], reports[i].report[1], > + i, i + 1, u32_delta, (unsigned)error, > + oa_report_get_ctx_id(reports[i + 1].report)); > + } > + for (int r = 0; r < n_reads; r++) { > + igt_debug(" > read\n"); > + for (int offset = 0; > + offset < reads[r].len; > + offset += header->size) { > + int counter_print = 13; Why counter A13, what's the significance? Perhaps we need a comment... > + uint64_t a0 = 0, aN = 0; > + double local_period = 0; > + > + header = (void *) &reads[r].buf[offset]; > + > + if (header->type != DRM_I915_PERF_RECORD_SAMPLE) { > + igt_debug(" > loss\n"); > + continue; > + } > + > + rpt = (void *)(header + 1); > + > + if (last) { > + a0 = gen8_read_40bit_a_counter(rpt, test_oa_format, 0) - > + gen8_read_40bit_a_counter(last, test_oa_format, 0); > + aN = gen8_read_40bit_a_counter(rpt, test_oa_format, counter_print) - > + gen8_read_40bit_a_counter(last, test_oa_format, counter_print); > + } > + > + if (last_periodic && > + oa_report_is_periodic(exponent, rpt)) { > + local_period = > + ((uint64_t) rpt[3] - last_periodic[3]) / > + ((uint64_t) rpt[1] - last_periodic[1]); Hmm I thought the 'GPU Clock Ticks' was only present on gen8+ reports, since this is also run on haswell are we not reading a counter here? Do we need to use the oa_reports_tick_per_period() helper? Some other tests also look suspect, or am I missing something? Anyway seems to pass now on my bdw :) > + } > + > + igt_debug(" > report ts=%u" > + " ts_delta_last=%8u ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x gpu_ticks=%u period=%.2f A0=%lu A%i=%lu\n", > + rpt[1], > + (last != NULL) ? (rpt[1] - last[1]) : 0, > + (last_periodic != NULL) ? (rpt[1] - last_periodic[1]) : 0, > + oa_report_is_periodic(exponent, rpt), > + oa_report_get_ctx_id(rpt), > + (last != NULL) ? (rpt[3] - last[3]) : 0, > + local_period, > + a0, counter_print, aN); > + > + last = rpt; > + if (oa_report_is_periodic(exponent, rpt)) > + last_periodic = rpt; > + } > + } > + > + igt_assert(!"reached"); > + } > + > + if (timestamp_delta_within(average_timestamp_delta, > + expected_timestamp_delta, > + expected_timestamp_delta * 0.05)) { > + igt_debug(" > timestamp delta matching %"PRIu64"ns ~= expected %"PRIu64"! :)\n", > + timebase_scale(average_timestamp_delta), > + timebase_scale(expected_timestamp_delta)); > + n_time_delta_matches++; > + } else { > + igt_debug(" > timestamp delta mismatch: %"PRIu64"ns != expected %"PRIu64"ns\n", > + timebase_scale(average_timestamp_delta), > + timebase_scale(expected_timestamp_delta)); > + igt_assert(average_timestamp_delta < > + (expected_timestamp_delta * 2)); > + } > > n_tested++; > + > + for (int r = 0; r < n_reads; r++) > + free(reads[r].buf); > } > > if (n_tested < 10) > - igt_debug("sysfs frequency pinning too unstable for cross-referencing with OA derived frequency"); > + igt_debug("Too many test iterations had to be skipped\n"); > igt_assert_eq(n_tested, 10); > + > + igt_debug("Number of iterations with expected timestamp delta = %d\n", > + n_time_delta_matches); > + > + /* The HW doesn't give us any strict guarantee that the > + * timestamps are exactly aligned with the exponent mask but > + * in practice it seems very rare for that not to be the case > + * so it a useful sanity check to assert quite strictly... > + */ > + igt_assert(n_time_delta_matches >= 9); > } > + > + load_helper_stop(); > + load_helper_fini(); > } > > /* The OA exponent selects a timestamp counter bit to trigger reports on. > @@ -2485,32 +3009,6 @@ test_disabled_read_error(void) > __perf_close(stream_fd); > } > > -static void > -emit_report_perf_count(struct intel_batchbuffer *batch, > - drm_intel_bo *dst_bo, > - int dst_offset, > - uint32_t report_id) > -{ > - if (IS_HASWELL(devid)) { > - BEGIN_BATCH(3, 1); > - OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT); > - OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, > - dst_offset); > - OUT_BATCH(report_id); > - ADVANCE_BATCH(); > - } else { > - /* XXX: NB: n dwords arg is actually magic since it internally > - * automatically accounts for larger addresses on gen >= 8... > - */ > - BEGIN_BATCH(3, 1); > - OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT); > - OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, > - dst_offset); > - OUT_BATCH(report_id); > - ADVANCE_BATCH(); > - } > -} > - > static void > test_mi_rpc(void) > { > @@ -2580,38 +3078,6 @@ test_mi_rpc(void) > __perf_close(stream_fd); > } > > -static void > -scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color) > -{ > - int ret; > - > - ret = drm_intel_bo_map(bo, true /* writable */); > - igt_assert_eq(ret, 0); > - > - for (int i = 0; i < width * height; i++) > - ((uint32_t *)bo->virtual)[i] = color; > - > - drm_intel_bo_unmap(bo); > -} > - > -static void > -scratch_buf_init(drm_intel_bufmgr *bufmgr, > - struct igt_buf *buf, > - int width, int height, > - uint32_t color) > -{ > - size_t stride = width * 4; > - size_t size = stride * height; > - drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096); > - > - scratch_buf_memset(bo, width, height, color); > - > - buf->bo = bo; > - buf->stride = stride; > - buf->tiling = I915_TILING_NONE; > - buf->size = size; > -} > - > static void > emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch, > drm_intel_bo *dst, > -- > 2.14.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx