On Wed, Sep 13, 2017 at 04:22:01PM +0530, Sagar Arun Kamble wrote: > This tests different performance metrics being streamed by i915 driver. > This feature in i915 also referred as Driver Assisted Performance > Capture (DAPC) provides userspace an ability to sample the OA reports > at execbuf boundaries and associate other metadata like CTX ID, PID, TAG > with each sample. Further, ability to capture engine timestamps and MMIO > reads is also provided. > > v2: Defining the enums for OA_SOURCE and PERF_PROP locally till the > libdrm changes are merged. > > Cc: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@xxxxxxxxx> Reviewed-by: Ewelina Musial <ewelina.musial@xxxxxxxxx> > --- > tests/Makefile.sources | 1 + > tests/intel_perf_dapc.c | 811 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 812 insertions(+) > create mode 100644 tests/intel_perf_dapc.c > > diff --git a/tests/Makefile.sources b/tests/Makefile.sources > index 6c19509..24bd099 100644 > --- a/tests/Makefile.sources > +++ b/tests/Makefile.sources > @@ -170,6 +170,7 @@ TESTS_progs = \ > gen7_forcewake_mt \ > gvt_basic \ > intel_perf \ > + intel_perf_dapc \ > kms_3d \ > kms_addfb_basic \ > kms_atomic \ > diff --git a/tests/intel_perf_dapc.c b/tests/intel_perf_dapc.c > new file mode 100644 > index 0000000..92b4dee > --- /dev/null > +++ b/tests/intel_perf_dapc.c > @@ -0,0 +1,811 @@ > +/* > + * Copyright © 2017 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > +#include <fcntl.h> > + > +#include "igt.h" > +#include "drm.h" > + > +IGT_TEST_DESCRIPTION("Test the i915 command stream based perf metrics streaming interface"); > + > +/* Temporarily copy i915-perf uapi here to avoid a dependency on libdrm's > + * i915_drm.h copy being updated with the i915-perf interface before this > + * test can land in i-g-t. > + * > + * TODO: remove this once the interface lands in libdrm > + */ > +#ifndef DRM_I915_PERF_OPEN > +#define DRM_I915_PERF_OPEN 0x36 > +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + \ > + DRM_I915_PERF_OPEN, \ > + struct drm_i915_perf_open_param) > + > +enum drm_i915_oa_format { > + I915_OA_FORMAT_A13 = 1, /* HSW only */ > + I915_OA_FORMAT_A29, /* HSW only */ > + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ > + I915_OA_FORMAT_B4_C8, /* HSW only */ > + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ > + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ > + I915_OA_FORMAT_C4_B8, /* HSW+ */ > + > + /* Gen8+ */ > + I915_OA_FORMAT_A12, > + I915_OA_FORMAT_A12_B8_C8, > + I915_OA_FORMAT_A32u40_A4u32_B8_C8, > + > + I915_OA_FORMAT_MAX /* non-ABI */ > +}; > + > +enum drm_i915_perf_sample_oa_source { > + I915_PERF_SAMPLE_OA_SOURCE_OABUFFER, > + I915_PERF_SAMPLE_OA_SOURCE_CS, > + I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */ > +}; > + > +#define I915_PERF_MMIO_NUM_MAX 8 > +struct drm_i915_perf_mmio_list { > + __u32 num_mmio; > + __u32 mmio_list[I915_PERF_MMIO_NUM_MAX]; > +}; > + > +enum drm_i915_perf_property_id { > + DRM_I915_PERF_PROP_CTX_HANDLE = 1, > + DRM_I915_PERF_PROP_SAMPLE_OA, > + DRM_I915_PERF_PROP_OA_METRICS_SET, > + DRM_I915_PERF_PROP_OA_FORMAT, > + DRM_I915_PERF_PROP_OA_EXPONENT, > + DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, > + DRM_I915_PERF_PROP_ENGINE, > + DRM_I915_PERF_PROP_SAMPLE_CTX_ID, > + DRM_I915_PERF_PROP_SAMPLE_PID, > + DRM_I915_PERF_PROP_SAMPLE_TAG, > + DRM_I915_PERF_PROP_SAMPLE_TS, > + DRM_I915_PERF_PROP_SAMPLE_MMIO, > + DRM_I915_PERF_PROP_MAX /* non-ABI */ > +}; > + > +struct drm_i915_perf_open_param { > + __u32 flags; > +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) > +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) > +#define I915_PERF_FLAG_DISABLED (1<<2) > + > + __u32 num_properties; > + __u64 properties_ptr; > +}; > + > +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) > +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) > + > +struct drm_i915_perf_record_header { > + __u32 type; > + __u16 pad; > + __u16 size; > +}; > + > +enum drm_i915_perf_record_type { > + DRM_I915_PERF_RECORD_SAMPLE = 1, > + DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, > + DRM_I915_PERF_RECORD_MAX /* non-ABI */ > +}; > +#endif /* !DRM_I915_PERF_OPEN */ > + > +/* There is no ifdef we can use for those formats :( */ > +enum { > + local_I915_OA_FORMAT_A12 = I915_OA_FORMAT_C4_B8 + 1, > + local_I915_OA_FORMAT_A12_B8_C8 = I915_OA_FORMAT_C4_B8 + 2, > + local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 = I915_OA_FORMAT_C4_B8 + 3, > +}; > + > +#define local_I915_OA_FORMAT_MAX (local_I915_OA_FORMAT_A32u40_A4u32_B8_C8 + 1) > + > +enum { > + local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER, > + local_I915_PERF_SAMPLE_OA_SOURCE_CS, > + local_I915_PERF_SAMPLE_OA_SOURCE_MAX /* non-ABI */ > +}; > + > +enum { > + local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE = DRM_I915_PERF_PROP_OA_EXPONENT + 1, > + local_DRM_I915_PERF_PROP_ENGINE = DRM_I915_PERF_PROP_OA_EXPONENT + 2, > + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID = DRM_I915_PERF_PROP_OA_EXPONENT + 3, > + local_DRM_I915_PERF_PROP_SAMPLE_PID = DRM_I915_PERF_PROP_OA_EXPONENT + 4, > + local_DRM_I915_PERF_PROP_SAMPLE_TAG = DRM_I915_PERF_PROP_OA_EXPONENT + 5, > + local_DRM_I915_PERF_PROP_SAMPLE_TS = DRM_I915_PERF_PROP_OA_EXPONENT + 6, > + local_DRM_I915_PERF_PROP_SAMPLE_MMIO = DRM_I915_PERF_PROP_OA_EXPONENT + 7, > + local_DRM_I915_PERF_PROP_MAX /* non-ABI */ > +}; > + > +static struct { > + const char *name; > + size_t size; > + int a40_high_off; /* bytes */ > + int a40_low_off; > + int n_a40; > + int a_off; > + int n_a; > + int first_a; > + int b_off; > + int n_b; > + int c_off; > + int n_c; > + int min_gen; > + int max_gen; > +} oa_formats[local_I915_OA_FORMAT_MAX] = { > + [I915_OA_FORMAT_A13] = { /* HSW only */ > + "A13", .size = 64, > + .a_off = 12, .n_a = 13, > + .max_gen = 7 }, > + [I915_OA_FORMAT_A29] = { /* HSW only */ > + "A29", .size = 128, > + .a_off = 12, .n_a = 29, > + .max_gen = 7 }, > + [I915_OA_FORMAT_A13_B8_C8] = { /* HSW only */ > + "A13_B8_C8", .size = 128, > + .a_off = 12, .n_a = 13, > + .b_off = 64, .n_b = 8, > + .c_off = 96, .n_c = 8, > + .max_gen = 7 }, > + [I915_OA_FORMAT_A45_B8_C8] = { /* HSW only */ > + "A45_B8_C8", .size = 256, > + .a_off = 12, .n_a = 45, > + .b_off = 192, .n_b = 8, > + .c_off = 224, .n_c = 8, > + .max_gen = 7 }, > + [I915_OA_FORMAT_B4_C8] = { /* HSW only */ > + "B4_C8", .size = 64, > + .b_off = 16, .n_b = 4, > + .c_off = 32, .n_c = 8, > + .max_gen = 7 }, > + [I915_OA_FORMAT_B4_C8_A16] = { /* HSW only */ > + "B4_C8_A16", .size = 128, > + .b_off = 16, .n_b = 4, > + .c_off = 32, .n_c = 8, > + .a_off = 60, .n_a = 16, .first_a = 29, > + .max_gen = 7 }, > + [I915_OA_FORMAT_C4_B8] = { /* HSW+ (header differs from HSW-Gen8+) */ > + "C4_B8", .size = 64, > + .c_off = 16, .n_c = 4, > + .b_off = 28, .n_b = 8 }, > + > + /* Gen8+ */ > + > + [local_I915_OA_FORMAT_A12] = { > + "A12", .size = 64, > + .a_off = 12, .n_a = 12, .first_a = 7, > + .min_gen = 8 }, > + [local_I915_OA_FORMAT_A12_B8_C8] = { > + "A12_B8_C8", .size = 128, > + .a_off = 12, .n_a = 12, > + .b_off = 64, .n_b = 8, > + .c_off = 96, .n_c = 8, .first_a = 7, > + .min_gen = 8 }, > + [local_I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { > + "A32u40_A4u32_B8_C8", .size = 256, > + .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32, > + .a_off = 144, .n_a = 4, .first_a = 32, > + .b_off = 192, .n_b = 8, > + .c_off = 224, .n_c = 8, > + .min_gen = 8 }, > + [I915_OA_FORMAT_C4_B8] = { > + "C4_B8", .size = 64, > + .c_off = 16, .n_c = 4, > + .b_off = 32, .n_b = 8, > + .min_gen = 8 }, > +}; > + > +static int drm_fd = -1; > +static uint32_t devid; > +static int card = -1; > + > +static uint64_t test_metric_set_id = UINT64_MAX; > + > +static uint64_t timestamp_frequency = 12500000; Please avoid magic numbers, we probably should define those somewhere. > +static enum drm_i915_oa_format test_oa_format; > +static uint64_t oa_exp_1_millisec; > + > +static igt_render_copyfunc_t render_copy = NULL; > + > +static uint64_t > +timebase_scale(uint32_t u32_delta) > +{ > + return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency; > +} > + > +/* Returns: the largest OA exponent that will still result in a sampling period > + * less than or equal to the given @period. > + */ > +static int > +max_oa_exponent_for_period_lte(uint64_t period) > +{ > + /* NB: timebase_scale() takes a uint32_t and an exponent of 30 > + * would already represent a period of ~3 minutes so there's > + * really no need to consider higher exponents. > + */ > + for (int i = 0; i < 30; i++) { > + uint64_t oa_period = timebase_scale(2 << i); > + > + if (oa_period > period) > + return max(0, i - 1); > + } > + > + igt_assert(!"reached"); > + return -1; > +} > + > +static bool > +try_read_u64_file(const char *file, uint64_t *val) > +{ > + char buf[32]; > + int fd, n; > + > + fd = open(file, O_RDONLY); > + if (fd < 0) > + return false; > + > + while ((n = read(fd, buf, sizeof(buf) - 1)) < 0 && errno == EINTR) > + ; > + igt_assert(n >= 0); > + > + close(fd); > + > + buf[n] = '\0'; > + *val = strtoull(buf, NULL, 0); > + > + return true; > +} > + > +static void > +write_u64_file(const char *file, uint64_t val) > +{ > + char buf[32]; > + int fd, len, ret; > + > + fd = open(file, O_WRONLY); > + igt_assert(fd >= 0); > + > + len = snprintf(buf, sizeof(buf), "%"PRIu64, val); > + igt_assert(len > 0); > + > + while ((ret = write(fd, buf, len)) < 0 && errno == EINTR) > + ; > + igt_assert_eq(ret, len); > + > + close(fd); > +} > + Write/read helpers are used only once so maybe those functions are redundant? Or if we want those helpers we could move them to libs. > +static bool > +init_sys_info(void) > +{ > + const char *test_set_name = NULL; > + const char *test_set_uuid = NULL; > + char buf[256]; > + > + igt_assert_neq(card, -1); > + igt_assert_neq(devid, 0); > + > + timestamp_frequency = 12500000; The same here - magic number > + > + if (IS_HASWELL(devid)) { > + /* We don't have a TestOa metric set for Haswell so use > + * RenderBasic > + */ > + test_set_name = "RenderBasic"; > + test_set_uuid = "403d8832-1a27-4aa6-a64e-f5389ce7b212"; > + test_oa_format = I915_OA_FORMAT_A45_B8_C8; > + } else { > + test_set_name = "TestOa"; > + test_oa_format = local_I915_OA_FORMAT_A32u40_A4u32_B8_C8; > + > + if (IS_BROADWELL(devid)) { > + test_set_uuid = "d6de6f55-e526-4f79-a6a6-d7315c09044e"; > + } else if (IS_CHERRYVIEW(devid)) { > + test_set_uuid = "4a534b07-cba3-414d-8d60-874830e883aa"; > + } else if (IS_SKYLAKE(devid)) { > + switch (intel_gt(devid)) { > + case 1: > + test_set_uuid = > + "1651949f-0ac0-4cb1-a06f-dafd74a407d1"; > + break; > + case 2: > + test_set_uuid = > + "2b985803-d3c9-4629-8a4f-634bfecba0e8"; > + break; > + case 3: > + test_set_uuid = > + "882fa433-1f4a-4a67-a962-c741888fe5f5"; > + break; > + default: > + igt_debug("unsupported Skylake GT size\n"); > + return false; > + } > + timestamp_frequency = 12000000; And here :) > + } else if (IS_BROXTON(devid)) { > + test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612"; > + timestamp_frequency = 19200000; > + } else if (IS_KABYLAKE(devid)) { > + switch (intel_gt(devid)) { > + case 1: > + test_set_uuid = > + "baa3c7e4-52b6-4b85-801e-465a94b746dd"; > + break; > + case 2: > + test_set_uuid = > + "f1792f32-6db2-4b50-b4b2-557128f1688d"; > + break; > + default: > + igt_debug("unsupported Kabylake GT size\n"); > + return false; > + } > + timestamp_frequency = 12000000; > + } else if (IS_GEMINILAKE(devid)) { > + test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf"; > + timestamp_frequency = 19200000; > + } else { > + igt_debug("unsupported GT\n"); > + return false; > + } > + } > + > + igt_debug("%s metric set UUID = %s\n", > + test_set_name, > + test_set_uuid); > + > + oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000); > + > + snprintf(buf, sizeof(buf), > + "/sys/class/drm/card%d/metrics/%s/id", > + card, > + test_set_uuid); > + > + return try_read_u64_file(buf, &test_metric_set_id); > +} > + > +static int > +__perf_open(int fd, struct drm_i915_perf_open_param *param) > +{ > + int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param); > + > + igt_assert(ret >= 0); > + errno = 0; > + > + return ret; > +} > + > +static void > +test_cs_oa_stream_create(void) > +{ > + igt_fork(child, 1) { > + uint64_t properties[] = { > + /* Include OA reports in samples */ > + DRM_I915_PERF_PROP_SAMPLE_OA, true, > + > + /* OA unit configuration */ > + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, > + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, > + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, > + > + /* CS parameters */ > + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true, > + local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_BSD, > + }; > + struct drm_i915_perf_open_param param = { > + .flags = I915_PERF_FLAG_FD_CLOEXEC | > + I915_PERF_FLAG_FD_NONBLOCK, > + .num_properties = sizeof(properties) / 16, > + .properties_ptr = to_user_pointer(properties), > + }; > + int stream_fd; > + > + do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL); > + > + /* Send Render Engine as PROP_ENGINE */ > + properties[ARRAY_SIZE(properties)-1] = I915_EXEC_RENDER; > + > + stream_fd = __perf_open(drm_fd, ¶m); > + close(stream_fd); > + } > + > + igt_waitchildren(); > +} > + > +static void > +scratch_buf_init(drm_intel_bufmgr *bufmgr, > + struct igt_buf *buf, > + int width, int height, > + uint32_t color) > +{ > + size_t stride = width * 4; > + size_t size = stride * height; > + drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096); > + int ret; > + > + ret = drm_intel_bo_map(bo, true /* writable */); > + igt_assert_eq(ret, 0); > + > + for (int i = 0; i < width * height; i++) > + ((uint32_t *)bo->virtual)[i] = color; > + > + drm_intel_bo_unmap(bo); > + > + buf->bo = bo; > + buf->stride = stride; > + buf->tiling = I915_TILING_NONE; > + buf->size = size; > +} > + > +/* > + * Given a set of CS properties including DRM_I915_PERF_PROP_SAMPLE_OA > + * this function returns the offset in the sample where OA report will > + * be located. > + */ > +static size_t > +get_oa_report_offset(uint64_t *properties, int prop_size) > +{ > + size_t offset = 0; > + int i = 0; > + > + do { > + switch (properties[i]) { > + case local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_PID: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_TAG: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; > + case local_DRM_I915_PERF_PROP_SAMPLE_TS: > + if (properties[i+1]) { > + offset += 8; > + i += 2; > + } > + break; Why are you doing the same thing for each case separately? > + case DRM_I915_PERF_PROP_CTX_HANDLE: > + case DRM_I915_PERF_PROP_SAMPLE_OA: > + case DRM_I915_PERF_PROP_OA_METRICS_SET: > + case DRM_I915_PERF_PROP_OA_FORMAT: > + case DRM_I915_PERF_PROP_OA_EXPONENT: > + case local_DRM_I915_PERF_PROP_ENGINE: > + i += 2; > + break; > + } > + > + if (properties[i] == local_DRM_I915_PERF_PROP_SAMPLE_MMIO) { > + uint32_t num_mmio = *((uint32_t *)properties[i+1]); > + > + offset += (num_mmio * 4); > + i += 2; > + } > + } while (i < prop_size); > + > + return offset; > +} > + > +static size_t > +get_perf_report_size(uint64_t *properties, int prop_size, int format_id) > +{ > + size_t format_size = oa_formats[format_id].size; > + size_t sample_size = 0; > + > + sample_size += get_oa_report_offset(properties, prop_size); > + sample_size += format_size; > + > + return sample_size; > +} > + > +static bool > +read_perf_reports(int stream_fd, > + uint8_t *perf_reports, > + int num_reports, > + size_t report_size, > + bool retry_on_loss) > +{ > + size_t sample_size = (sizeof(struct drm_i915_perf_record_header) + > + report_size); > + const struct drm_i915_perf_record_header *header; > + uint8_t *base_perf_reports = perf_reports; > + int i = 0; > + > + /* Note: we allocate a large buffer so that each read() iteration > + * should scrape *all* pending records. > + * > + * The largest buffer the OA unit supports is 16MB and the smallest > + * perf report format is 64bytes + 8bytes allowing up to 233016 > + * reports to be buffered. > + * > + * Being sure we are fetching all buffered reports allows us to > + * potentially throw away / skip all reports whenever we see > + * a _REPORT_LOST notification as a way of being sure are > + * measurements aren't skewed by a lost report. > + * > + * Note: that is is useful for some tests but also not something > + * applications would be expected to resort to. Lost reports are > + * somewhat unpredictable but typically don't pose a problem - except > + * to indicate that the OA unit may be over taxed if lots of reports > + * are being lost. > + */ > + int buf_size = 233016 * > + (72 + sizeof(struct drm_i915_perf_record_header)); > + uint8_t *buf = malloc(buf_size); > + > + igt_assert(buf); > + > + do { > + ssize_t len; > + > + while ((len = read(stream_fd, buf, buf_size)) < 0 && > + errno == EINTR) > + ; > + > + igt_assert(len > 0); > + > + for (size_t offset = 0; offset < len; offset += header->size) { > + const uint8_t *report; > + size_t sample_offset = 0; > + > + header = (void *)(buf + offset); > + > + igt_assert_eq(header->pad, 0); /* Reserved */ > + > + /* Currently the only test that should ever expect to > + * see a _BUFFER_LOST error is the buffer_fill test, > + * otherwise something bad has probably happened... > + */ > + igt_assert_neq(header->type, > + DRM_I915_PERF_RECORD_OA_BUFFER_LOST); > + > + /* At high sampling frequencies the OA HW might not be > + * able to cope with all write requests and will notify > + * us that a report was lost. We restart our read of > + * two sequential reports due to the timeline blip this > + * implies > + */ > + if (header->type == > + DRM_I915_PERF_RECORD_OA_REPORT_LOST) { > + igt_debug("read restart: OA trigger collision " > + "/ report lost\n"); > + if (!retry_on_loss) { > + igt_debug("Freeing memory\n"); > + free(buf); > + return false; > + } > + i = 0; > + perf_reports = base_perf_reports; > + > + /* XXX: break, because we don't know where > + * within the series of already read reports > + * there could be a blip from the lost report. > + */ > + break; > + } > + > + /* Currently the only other record type expected is a > + * _SAMPLE. Notably this test will need updating if > + * i915-perf is extended in the future with additional > + * record types. > + */ > + igt_assert_eq(header->type, > + DRM_I915_PERF_RECORD_SAMPLE); > + > + igt_assert_eq(header->size, sample_size); > + > + sample_offset = offset + > + sizeof(struct drm_i915_perf_record_header); > + report = (const uint8_t *)(buf + sample_offset); > + > + memcpy(perf_reports, report, report_size); > + perf_reports += report_size; > + i++; > + if (i == num_reports) > + break; > + } > + } while (i < num_reports); > + > + free(buf); > + return true; > +} > + > +static void > +perf_stream_capture_workload_samples(struct drm_i915_perf_open_param *param, > + uint8_t *perf_reports, > + int num_reports, int report_size) > +{ > + drm_intel_bufmgr *bufmgr; > + drm_intel_context *context0; > + struct intel_batchbuffer *batch; > + struct igt_buf src, dst; > + int width = 800; > + int height = 600; > + uint32_t ctx_id = 0xffffffff; /* invalid id */ > + int stream_fd; > + int ret; > + bool valid_data = false; > + > +retry: > + bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); > + drm_intel_bufmgr_gem_enable_reuse(bufmgr); > + > + scratch_buf_init(bufmgr, &src, width, height, 0xff0000ff); > + scratch_buf_init(bufmgr, &dst, width, height, 0x00ff00ff); > + > + batch = intel_batchbuffer_alloc(bufmgr, devid); > + > + context0 = drm_intel_gem_context_create(bufmgr); > + igt_assert(context0); > + > + ret = drm_intel_gem_context_get_id(context0, &ctx_id); > + igt_assert_eq(ret, 0); > + igt_assert_neq(ctx_id, 0xffffffff); > + > + igt_debug("opening i915-perf stream\n"); > + stream_fd = __perf_open(drm_fd, param); > + > + render_copy(batch, > + context0, > + &src, 0, 0, width, height, > + &dst, 0, 0); > + > + intel_batchbuffer_flush_with_context(batch, context0); > + > + drm_intel_bo_unreference(src.bo); > + drm_intel_bo_unreference(dst.bo); > + > + intel_batchbuffer_free(batch); > + drm_intel_gem_context_destroy(context0); > + drm_intel_bufmgr_destroy(bufmgr); > + > + valid_data = read_perf_reports(stream_fd, perf_reports, > + num_reports, report_size, > + false); > + if (!valid_data) { > + close(stream_fd); > + goto retry; > + } > +} > + > +struct oa_source_sample { > + uint64_t source; > + uint64_t ctx_id; > + uint8_t oa_report[]; > +}; > + > +#define SOURCE(i) (i == 0) ? "OABUFFER" : "CS" > + > +static void > +verify_source(uint8_t *perf_reports, int num_reports, size_t report_size) > +{ > + struct oa_source_sample *sample; > + uint32_t *oa_report; > + > + for (int i = 0; i < num_reports; i++) { > + size_t offset = i * report_size; > + > + sample = (struct oa_source_sample *) (perf_reports + offset); > + oa_report = (uint32_t *) sample->oa_report; > + > + igt_debug("read report: source= %s, reason = %x, " > + "timestamp = %x\n", > + SOURCE(sample->source), oa_report[0], oa_report[1]); > + > + igt_assert((sample->source == > + local_I915_PERF_SAMPLE_OA_SOURCE_OABUFFER) || > + (sample->source == > + local_I915_PERF_SAMPLE_OA_SOURCE_CS)); > + > + if (sample->source == local_I915_PERF_SAMPLE_OA_SOURCE_CS) > + igt_assert(!oa_report[0]); > + > + /* Don't expect zero for timestamps */ > + igt_assert_neq(oa_report[1], 0); > + } > +} > + > +static void > +test_oa_source(void) > +{ > + uint64_t properties[] = { > + /* Include OA reports in samples */ > + DRM_I915_PERF_PROP_SAMPLE_OA, true, > + > + /* OA unit configuration */ > + DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id, > + DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format, > + DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec, > + > + /* CS parameters */ > + local_DRM_I915_PERF_PROP_ENGINE, I915_EXEC_RENDER, > + local_DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE, true, > + local_DRM_I915_PERF_PROP_SAMPLE_CTX_ID, true, > + }; > + struct drm_i915_perf_open_param param = { > + .flags = I915_PERF_FLAG_FD_CLOEXEC, > + .num_properties = sizeof(properties) / 16, > + .properties_ptr = to_user_pointer(properties), > + }; > + > + /* should be default, but just to be sure... */ > + write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1); > + > + igt_fork(child, 1) { > + int prop_size = ARRAY_SIZE(properties); > + int num_reports = 10; > + int report_size = get_perf_report_size(properties, prop_size, > + test_oa_format); > + int total_size = num_reports * report_size; > + uint8_t *perf_reports = malloc(total_size); > + > + igt_assert(perf_reports); > + > + perf_stream_capture_workload_samples(¶m, perf_reports, > + num_reports, report_size); > + verify_source(perf_reports, num_reports, report_size); > + free(perf_reports); > + } > + > + igt_waitchildren(); > +} > + > +igt_main > +{ > + igt_skip_on_simulation(); > + > + igt_fixture { > + drm_fd = drm_open_driver_render(DRIVER_INTEL); > + devid = intel_get_drm_devid(drm_fd); > + card = drm_get_card(); > + > + igt_require(init_sys_info()); > + > + render_copy = igt_get_render_copyfunc(devid); > + igt_require_f(render_copy, "no render-copy function\n"); > + } > + > + igt_subtest("cs-oa-stream-create") > + test_cs_oa_stream_create(); > + > + igt_subtest("oa-source") > + test_oa_source(); > + > + igt_fixture { > + close(drm_fd); > + } > +} > -- > 1.9.1 > -- Cheers, Ewelina > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx