On 08/31, Lionel Landwerlin wrote: > New issues that were discovered while making the tests work on Gen8+ : > > - we need to measure timings between periodic reports and discard all > other kind of reports > > - it seems periodicity of the reports can be affected outside of RC6 > (frequency change), we can detect this by looking at the amount of > clock cycles per timestamp deltas > > v2: Drop some unused variables (Matthew) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- <SNIP> > + > +static uint32_t > +i915_get_one_gpu_timestamp(uint32_t *context_id) > +{ > + drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); > + drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr); > + drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64); alignment=64 ? > + struct intel_batchbuffer *mi_rpc_batch = intel_batchbuffer_alloc(bufmgr, devid); > + int ret; > + uint32_t timestamp; > + > + drm_intel_bufmgr_gem_enable_reuse(bufmgr); > + > + if (context_id) { > + ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id); > + igt_assert_eq(ret, 0); > + } > + > + igt_assert(mi_rpc_ctx); > + igt_assert(mi_rpc_bo); > + igt_assert(mi_rpc_batch); > + > + ret = drm_intel_bo_map(mi_rpc_bo, true); > + igt_assert_eq(ret, 0); > + memset(mi_rpc_bo->virtual, 0x80, 4096); > + drm_intel_bo_unmap(mi_rpc_bo); > + > + emit_report_perf_count(mi_rpc_batch, > + mi_rpc_bo, /* dst */ > + 0, /* dst offset in bytes */ > + 0xdeadbeef); /* report ID */ > + > + intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx); > + > + ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */); > + igt_assert_eq(ret, 0); > + timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1]; > + drm_intel_bo_unmap(mi_rpc_bo); > + > + drm_intel_bo_unreference(mi_rpc_bo); > + intel_batchbuffer_free(mi_rpc_batch); > + drm_intel_gem_context_destroy(mi_rpc_ctx); > + drm_intel_bufmgr_destroy(bufmgr); > + > + return timestamp; > +} > > static void > hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1, > @@ -1050,7 +1206,6 @@ i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format, > return total_len; > } > > - > /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system > * control parameter dev.i915.perf_stream_paranoid == 0 */ > static void > @@ -1365,20 +1520,6 @@ open_and_read_2_oa_reports(int format_id, > __perf_close(stream_fd); > } > > -static void > -gen8_read_report_clock_ratios(uint32_t *report, > - uint32_t *slice_freq_mhz, > - uint32_t *unslice_freq_mhz) > -{ > - uint32_t unslice_freq = report[0] & 0x1ff; > - uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; > - uint32_t slice_freq_high = (report[0] >> 9) & 0x3; > - uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); > - > - *slice_freq_mhz = (slice_freq * 16666) / 1000; > - *unslice_freq_mhz = (unslice_freq * 16666) / 1000; > -} > - > static void > print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt) > { > @@ -1570,74 +1711,456 @@ test_oa_formats(void) > } > } > > + > +enum load { > + LOW, > + HIGH > +}; > + > +#define LOAD_HELPER_PAUSE_USEC 500 > + > +static struct load_helper { > + int devid; > + int has_ppgtt; > + drm_intel_bufmgr *bufmgr; > + drm_intel_context *context; > + uint32_t context_id; > + struct intel_batchbuffer *batch; > + enum load load; > + bool exit; > + struct igt_helper_process igt_proc; > + struct igt_buf src, dst; > +} lh = { 0, }; We can drop the initialisation since this is static. > + > +static void load_helper_signal_handler(int sig) > +{ > + if (sig == SIGUSR2) > + lh.load = lh.load == LOW ? HIGH : LOW; > + else > + lh.exit = true; > +} > + > +static void load_helper_set_load(enum load load) > +{ > + igt_assert(lh.igt_proc.running); > + > + if (lh.load == load) > + return; > + > + lh.load = load; > + kill(lh.igt_proc.pid, SIGUSR2); > +} > + > +static void load_helper_run(enum load load) > +{ > + /* > + * FIXME fork helpers won't get cleaned up when started from within a > + * subtest, so handle the case where it sticks around a bit too long. > + */ > + if (lh.igt_proc.running) { > + load_helper_set_load(load); > + return; > + } > + > + lh.load = load; > + > + igt_fork_helper(&lh.igt_proc) { > + signal(SIGUSR1, load_helper_signal_handler); > + signal(SIGUSR2, load_helper_signal_handler); > + > + while (!lh.exit) { > + int ret; > + > + render_copy(lh.batch, > + lh.context, > + &lh.src, 0, 0, 1920, 1080, > + &lh.dst, 0, 0); > + > + intel_batchbuffer_flush_with_context(lh.batch, > + lh.context); > + > + ret = drm_intel_gem_context_get_id(lh.context, > + &lh.context_id); > + igt_assert_eq(ret, 0); > + > + drm_intel_bo_wait_rendering(lh.dst.bo); > + > + /* Lower the load by pausing after every submitted > + * write. */ > + if (lh.load == LOW) > + usleep(LOAD_HELPER_PAUSE_USEC); > + } > + } > +} > + > +static void load_helper_stop(void) > +{ > + kill(lh.igt_proc.pid, SIGUSR1); > + igt_assert(igt_wait_helper(&lh.igt_proc) == 0); > +} > + > +static void load_helper_init(void) > +{ > + int ret; > + > + lh.devid = intel_get_drm_devid(drm_fd); > + lh.has_ppgtt = gem_uses_ppgtt(drm_fd); > + > + /* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs > + * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but > + * that's also all we care about for the rps testcase*/ > + igt_assert(intel_gen(lh.devid) >= 6); > + lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); > + igt_assert(lh.bufmgr); > + > + drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr); > + > + lh.context = drm_intel_gem_context_create(lh.bufmgr); > + igt_assert(lh.context); > + > + lh.context_id = 0xffffffff; > + ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id); > + igt_assert_eq(ret, 0); > + igt_assert_neq(lh.context_id, 0xffffffff); > + > + lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid); > + igt_assert(lh.batch); > + > + scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0); > + scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0); > +} > + > +static void load_helper_fini(void) > +{ > + if (lh.igt_proc.running) > + load_helper_stop(); > + > + if (lh.src.bo) > + drm_intel_bo_unreference(lh.src.bo); > + if (lh.dst.bo) > + drm_intel_bo_unreference(lh.dst.bo); > + > + if (lh.batch) > + intel_batchbuffer_free(lh.batch); > + > + if (lh.context) > + drm_intel_gem_context_destroy(lh.context); > + > + if (lh.bufmgr) > + drm_intel_bufmgr_destroy(lh.bufmgr); > +} > + > static void > test_oa_exponents(void) > { > - igt_debug("Testing OA timer exponents\n"); > + load_helper_init(); > + load_helper_run(HIGH); > > /* It's asking a lot to sample with a 160 nanosecond period and the > * test can fail due to buffer overflows if it wasn't possible to > * keep up, so we don't start from an exponent of zero... > */ > - for (int i = 5; i < 20; i++) { > - uint32_t expected_timestamp_delta; > - uint32_t timestamp_delta; > - uint32_t oa_report0[64]; > - uint32_t oa_report1[64]; > + for (int exponent = 5; exponent < 18; exponent++) { Why the change from 20 -> 18? Otherwise looks okay: Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx