From: Robert Bragg <robert@xxxxxxxxxxxxx> Signed-off-by: Robert Bragg <robert@xxxxxxxxxxxxx> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- tests/perf.c | 135 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 51 deletions(-) diff --git a/tests/perf.c b/tests/perf.c index 15f41246..d47e45c8 100644 --- a/tests/perf.c +++ b/tests/perf.c @@ -249,6 +249,7 @@ static uint64_t gt_max_freq_mhz = 0; static uint64_t timestamp_frequency = 12500000; static enum drm_i915_oa_format test_oa_format; static bool *undefined_a_counters; +static uint64_t oa_exp_1_millisec; static igt_render_copyfunc_t render_copy = NULL; static uint32_t (*read_report_ticks)(uint32_t *report, @@ -424,11 +425,11 @@ timebase_scale(uint32_t u32_delta) return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency; } -/* Return the largest OA exponent that will still result in a sampling - * frequency higher than the given frequency. +/* Returns: the largest OA exponent that will still result in a sampling period + * less than or equal to the given @period. */ static int -max_oa_exponent_for_higher_freq(uint64_t freq) +max_oa_exponent_for_period_lte(uint64_t period) { /* NB: timebase_scale() takes a uint32_t and an exponent of 30 * would already represent a period of ~3 minutes so there's @@ -436,9 +437,8 @@ max_oa_exponent_for_higher_freq(uint64_t freq) */ for (int i = 0; i < 30; i++) { uint64_t oa_period = timebase_scale(2 << i); - uint32_t oa_freq = NSEC_PER_SEC / oa_period; - if (oa_freq <= freq) + if (oa_period > period) return max(0, i - 1); } @@ -446,6 +446,25 @@ max_oa_exponent_for_higher_freq(uint64_t freq) return -1; } +/* Return: the largest OA exponent that will still result in a sampling + * frequency greater than the given @frequency. + */ +static int +max_oa_exponent_for_freq_gt(uint64_t frequency) +{ + uint64_t period = NSEC_PER_SEC / frequency; + + igt_assert_neq(period, 0); + + return max_oa_exponent_for_period_lte(period - 1); +} + +static uint64_t +oa_exponent_to_ns(int exponent) +{ + return 1000000000ULL * (2ULL << exponent) / timestamp_frequency; +} + static uint64_t gen8_read_40bit_a_counter(uint32_t *report, enum drm_i915_oa_format fmt, int a_id) { @@ -524,6 +543,8 @@ init_sys_info(void) test_set_name, test_set_uuid); + oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000); + snprintf(buf, sizeof(buf), "/sys/class/drm/card%d/metrics/%s/id", card, @@ -593,7 +614,7 @@ test_system_wide_paranoid(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC | @@ -619,7 +640,7 @@ test_system_wide_paranoid(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC | @@ -653,7 +674,7 @@ test_invalid_open_flags(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, }; struct drm_i915_perf_open_param param = { .flags = ~0, /* Undefined flag bits set! */ @@ -673,7 +694,7 @@ test_invalid_oa_metric_set_id(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, DRM_I915_PERF_PROP_OA_METRICS_SET, UINT64_MAX, }; struct drm_i915_perf_open_param param = { @@ -708,7 +729,7 @@ test_invalid_oa_format_id(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, DRM_I915_PERF_PROP_OA_FORMAT, UINT64_MAX, }; struct drm_i915_perf_open_param param = { @@ -742,7 +763,7 @@ test_missing_sample_flags(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, }; struct drm_i915_perf_open_param param = { @@ -982,8 +1003,6 @@ print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt) static void test_oa_formats(void) { - int oa_exponent = 13; - for (int i = 0; i < ARRAY_SIZE(oa_formats); i++) { uint32_t oa_report0[64]; uint32_t oa_report1[64]; @@ -1013,7 +1032,7 @@ test_oa_formats(void) igt_debug("Checking OA format %s\n", oa_formats[i].name); open_and_read_2_oa_reports(i, - oa_exponent, + oa_exp_1_millisec, oa_report0, oa_report1, false); /* timer reports only */ @@ -1252,7 +1271,7 @@ static void test_low_oa_exponent_permissions(void) { int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate"); - int bad_exponent = max_oa_exponent_for_higher_freq(max_freq); + int bad_exponent = max_oa_exponent_for_freq_gt(max_freq); int ok_exponent = bad_exponent + 1; uint64_t properties[] = { /* Include OA reports in samples */ @@ -1326,7 +1345,7 @@ test_per_context_mode_unprivileged(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, - DRM_I915_PERF_PROP_OA_EXPONENT, 13, /* 1 millisecond */ + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC, @@ -1399,13 +1418,14 @@ get_time(void) static void test_blocking(void) { - /* 40 milliseconds + /* ~40 milliseconds * * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop * scheduling (liable to kick in when we make blocking poll()s/reads) * from interfering with the test. */ - int oa_exponent = 18; + int oa_exponent = max_oa_exponent_for_period_lte(40000000); + uint64_t oa_period = oa_exponent_to_ns(oa_exponent); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -1428,8 +1448,7 @@ test_blocking(void) int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK); int64_t test_duration_ns = tick_ns * 1000; - /* Based on the 40ms OA sampling period set above: max OA samples: */ - int max_iterations = (test_duration_ns / 40000000ull) + 1; + int max_iterations = (test_duration_ns / oa_period) + 1; /* It's a bit tricky to put a lower limit here, but we expect a * relatively low latency for seeing reports, while we don't currently @@ -1440,7 +1459,7 @@ test_blocking(void) * the knowledge that that the driver uses a 200Hz hrtimer (5ms period) * to check for data and giving some time to read(). */ - int min_iterations = (test_duration_ns / 46000000ull); + int min_iterations = (test_duration_ns / (oa_period + 6000000ull)); int64_t start; int n = 0; @@ -1489,7 +1508,7 @@ test_blocking(void) user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns; kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns; - igt_debug("%d blocking reads during test with 25Hz OA sampling\n", n); + igt_debug("%d blocking reads during test with ~25Hz OA sampling\n", n); igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n", user_ns, (int)tick_ns, (int)start_times.tms_utime, (int)end_times.tms_utime); @@ -1515,13 +1534,14 @@ test_blocking(void) static void test_polling(void) { - /* 40 milliseconds + /* ~40 milliseconds * * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop * scheduling (liable to kick in when we make blocking poll()s/reads) * from interfering with the test. */ - int oa_exponent = 18; + int oa_exponent = max_oa_exponent_for_period_lte(40000000); + uint64_t oa_period = oa_exponent_to_ns(oa_exponent); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -1545,8 +1565,7 @@ test_polling(void) int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK); int64_t test_duration_ns = tick_ns * 1000; - /* Based on the 40ms OA sampling period set above: max OA samples: */ - int max_iterations = (test_duration_ns / 40000000ull) + 1; + int max_iterations = (test_duration_ns / oa_period) + 1; /* It's a bit tricky to put a lower limit here, but we expect a * relatively low latency for seeing reports, while we don't currently @@ -1557,7 +1576,7 @@ test_polling(void) * the knowledge that that the driver uses a 200Hz hrtimer (5ms period) * to check for data and giving some time to read(). */ - int min_iterations = (test_duration_ns / 46000000ull); + int min_iterations = (test_duration_ns / (oa_period + 6000000ull)); int64_t start; int n = 0; @@ -1636,7 +1655,7 @@ test_polling(void) user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns; kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns; - igt_debug("%d blocking poll()s during test with 25Hz OA sampling\n", n); + igt_debug("%d blocking poll()s during test with ~25Hz OA sampling\n", n); igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n", user_ns, (int)tick_ns, (int)start_times.tms_utime, (int)end_times.tms_utime); @@ -1662,7 +1681,9 @@ test_polling(void) static void test_buffer_fill(void) { - int oa_exponent = 5; /* 5 micro seconds */ + /* ~5 micro second period */ + int oa_exponent = max_oa_exponent_for_period_lte(5000); + uint64_t oa_period = oa_exponent_to_ns(oa_exponent); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -1680,7 +1701,12 @@ test_buffer_fill(void) int stream_fd = __perf_open(drm_fd, ¶m); int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header)); uint8_t *buf = malloc(buf_size); + size_t oa_buf_size = 16 * 1024 * 1024; + size_t report_size = oa_formats[test_oa_format].size; + int n_full_oa_reports = oa_buf_size / report_size; + uint64_t fill_duration = n_full_oa_reports * oa_period; + igt_assert(fill_duration < 1000000000); for (int i = 0; i < 5; i++) { struct drm_i915_perf_record_header *header; @@ -1688,9 +1714,9 @@ test_buffer_fill(void) int offset = 0; int len; - /* It should take ~330 milliseconds to fill a 16MB OA buffer with a - * 5 microsecond sampling period and 256 byte reports. */ - nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL); + nanosleep(&(struct timespec){ .tv_sec = 0, + .tv_nsec = fill_duration * 1.25 }, + NULL); while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR) ; @@ -1707,15 +1733,17 @@ test_buffer_fill(void) igt_assert_eq(overflow_seen, true); - nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 1000000 }, NULL); + nanosleep(&(struct timespec){ .tv_sec = 0, + .tv_nsec = fill_duration / 2 }, + NULL); while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR) ; igt_assert_neq(len, -1); - /* expect ~ 200 records in 1 millisecond */ - igt_assert(len > 256 * 150); + igt_assert(len > report_size * n_full_oa_reports * 0.45); + igt_assert(len < report_size * n_full_oa_reports * 0.55); overflow_seen = false; for (offset = 0; offset < len; offset += header->size) { @@ -1736,7 +1764,9 @@ test_buffer_fill(void) static void test_enable_disable(void) { - int oa_exponent = 5; /* 5 micro seconds */ + /* ~5 micro second period */ + int oa_exponent = max_oa_exponent_for_period_lte(5000); + uint64_t oa_period = oa_exponent_to_ns(oa_exponent); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -1755,20 +1785,22 @@ test_enable_disable(void) int stream_fd = __perf_open(drm_fd, ¶m); int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header)); uint8_t *buf = malloc(buf_size); + size_t oa_buf_size = 16 * 1024 * 1024; + size_t report_size = oa_formats[test_oa_format].size; + int n_full_oa_reports = oa_buf_size / report_size; + uint64_t fill_duration = n_full_oa_reports * oa_period; for (int i = 0; i < 5; i++) { int len; - /* If the stream were enabled then it would take ~330 - * milliseconds to fill a 16MB OA buffer with a 5 microsecond - * sampling period and 256 byte reports. - * - * Giving enough time for an overflow might help catch whether + /* Giving enough time for an overflow might help catch whether * the OA unit has been enabled even if the driver might at * least avoid copying reports while disabled. */ - nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL); + nanosleep(&(struct timespec){ .tv_sec = 0, + .tv_nsec = fill_duration * 1.25 }, + NULL); while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR) ; @@ -1778,15 +1810,17 @@ test_enable_disable(void) do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0); - nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 1000000 }, NULL); + nanosleep(&(struct timespec){ .tv_sec = 0, + .tv_nsec = fill_duration / 2 }, + NULL); while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR) ; igt_assert_neq(len, -1); - /* expect ~ 200 records in 1 millisecond */ - igt_assert(len > 256 * 150 && len < 256 * 2000); + igt_assert(len > report_size * n_full_oa_reports * 0.45); + igt_assert(len < report_size * n_full_oa_reports * 0.55); do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0); @@ -1807,7 +1841,7 @@ test_enable_disable(void) static void test_short_reads(void) { - int oa_exponent = 5; /* 5 micro seconds */ + int oa_exponent = max_oa_exponent_for_period_lte(5000); uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -2350,7 +2384,6 @@ test_per_ctx_mi_rpc(void) static void test_rc6_disable(void) { - int oa_exponent = 13; /* 1 millisecond */ uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -2358,7 +2391,7 @@ test_rc6_disable(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, - DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent, + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC, @@ -2424,7 +2457,6 @@ done: static void test_i915_ref_count(void) { - int oa_exponent = 13; /* 1 millisecond */ uint64_t properties[] = { /* Include OA reports in samples */ DRM_I915_PERF_PROP_SAMPLE_OA, true, @@ -2432,7 +2464,7 @@ test_i915_ref_count(void) /* OA unit configuration */ DRM_I915_PERF_PROP_OA_METRICS_SET, 0 /* updated below */, DRM_I915_PERF_PROP_OA_FORMAT, 0, /* update below */ - DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent, + DRM_I915_PERF_PROP_OA_EXPONENT, 0, /* update below */ }; struct drm_i915_perf_open_param param = { .flags = I915_PERF_FLAG_FD_CLOEXEC, @@ -2462,6 +2494,7 @@ test_i915_ref_count(void) igt_require(init_sys_info()); properties[3] = test_metric_set_id; properties[5] = test_oa_format; + properties[7] = oa_exp_1_millisec; ref_count0 = read_i915_module_ref(); igt_debug("initial ref count with drm_fd open = %u\n", ref_count0); @@ -2481,7 +2514,7 @@ test_i915_ref_count(void) read_2_oa_reports(stream_fd, test_oa_format, - oa_exponent, + oa_exp_1_millisec, oa_report0, oa_report1, false); /* not just timer reports */ -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx