From: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> Verify that the per-context dynamic SSEU uAPI works as expected. To achieve that, in the absence of a better mechamism, we read the value of PWR_CLK_STATE register, or use MI_SET_PREDICATE on platforms before Cannonlake. This register is written to by the GPU on context restore so this way we verify i915 is correctly updating the context image in all circumstances. v2: Add subslice tests (Lionel) Use MI_SET_PREDICATE for further verification when available (Lionel) v3: Rename to gem_ctx_rpcs (Lionel) v4: Update kernel API (Lionel) Add 0 value test (Lionel) Exercise invalid values (Lionel) v5: Add perf tests (Lionel) v6: Add new sysfs entry tests (Lionel) v7: Test rsvd fields Update for kernel series changes v8: Drop test_no_sseu_support() test (Kelvin) Drop drm_intel_*() apis (Chris) v9: by Chris: Drop all do_ioctl/do_ioctl_err() Use gem_context_[gs]et_param() Use gem_read() instead of mapping memory by Lionel: Test dynamic sseu on/off more Tvrtko Ursulin: v10: * Various style tweaks and refactorings. * New test coverage. v11: * Change platform support to just Gen11. * Simplify availability test. (Chris Wilson) * More invalid pointer tests. (Chris Wilson) v12: * Fix MAP_FIXED use (doh!). * Fix get/set copy&paste errors. * Drop supported platform test. (Chris Wilson) * Add mmap__gtt test. (Chris Wilson) v13: * Commit message tweaks. * Added reset/hang/suspend tests. (Chris Wilson) * Assert spinner is busy. (Chris Wilson) * Remove some more ABI assumptions. (Chris Wilson) v14: * Use default resume time. (Chris Wilson) * Trigger hang after rpcs read batch has been submitted. (Chris Wilson) v15: * Adjust for uAPI restrictions. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> # v14 --- tests/Makefile.am | 1 + tests/Makefile.sources | 1 + tests/gem_ctx_param.c | 4 +- tests/gem_ctx_sseu.c | 1190 ++++++++++++++++++++++++++++++++++++++++ tests/meson.build | 7 + 5 files changed, 1202 insertions(+), 1 deletion(-) create mode 100644 tests/gem_ctx_sseu.c diff --git a/tests/Makefile.am b/tests/Makefile.am index ee5a7c5e83b8..6b67bd2cc17a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -107,6 +107,7 @@ gem_close_race_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_close_race_LDADD = $(LDADD) -lpthread gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_ctx_thrash_LDADD = $(LDADD) -lpthread +gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_exec_parallel_LDADD = $(LDADD) -lpthread gem_fence_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 269336ad3150..6765143bf344 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -55,6 +55,7 @@ TESTS_progs = \ gem_ctx_exec \ gem_ctx_isolation \ gem_ctx_param \ + gem_ctx_sseu \ gem_ctx_switch \ gem_ctx_thrash \ gem_double_irq_loop \ diff --git a/tests/gem_ctx_param.c b/tests/gem_ctx_param.c index c46fd709b0d7..af1afeaa2f2f 100644 --- a/tests/gem_ctx_param.c +++ b/tests/gem_ctx_param.c @@ -294,11 +294,13 @@ igt_main set_priority(fd); } + /* I915_CONTEXT_PARAM_SSEU tests are located in gem_ctx_sseu.c */ + /* NOTE: This testcase intentionally tests for the next free parameter * to catch ABI extensions. Don't "fix" this testcase without adding all * the tests for the new param first. */ - arg.param = I915_CONTEXT_PARAM_PRIORITY + 1; + arg.param = I915_CONTEXT_PARAM_SSEU + 1; igt_subtest("invalid-param-get") { arg.ctx_id = ctx; diff --git a/tests/gem_ctx_sseu.c b/tests/gem_ctx_sseu.c new file mode 100644 index 000000000000..889f70643392 --- /dev/null +++ b/tests/gem_ctx_sseu.c @@ -0,0 +1,1190 @@ +/* + * Copyright © 2017-2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> + * + */ + +#include "igt.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> +#include <errno.h> +#include <time.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "igt_dummyload.h" +#include "igt_perf.h" +#include "igt_sysfs.h" +#include "ioctl_wrappers.h" + +IGT_TEST_DESCRIPTION("Test context render powergating programming."); + +#define MI_STORE_REGISTER_MEM (0x24 << 23) + +#define MI_SET_PREDICATE (0x1 << 23) +#define MI_SET_PREDICATE_NOOP_NEVER (0) +#define MI_SET_PREDICATE_NOOP_RESULT2_CLEAR (1) +#define MI_SET_PREDICATE_NOOP_RESULT2_SET (2) +#define MI_SET_PREDICATE_NOOP_RESULT_CLEAR (3) +#define MI_SET_PREDICATE_NOOP_RESULT_SET (4) +#define MI_SET_PREDICATE_1_SLICES (5) +#define MI_SET_PREDICATE_2_SLICES (6) +#define MI_SET_PREDICATE_3_SLICES (7) + +#define GEN8_R_PWR_CLK_STATE 0x20C8 +#define GEN8_RPCS_ENABLE (1 << 31) +#define GEN8_RPCS_S_CNT_ENABLE (1 << 18) +#define GEN8_RPCS_S_CNT_SHIFT 15 +#define GEN8_RPCS_S_CNT_MASK (0x7 << GEN8_RPCS_S_CNT_SHIFT) +#define GEN11_RPCS_S_CNT_SHIFT 12 +#define GEN11_RPCS_S_CNT_MASK (0x3f << GEN11_RPCS_S_CNT_SHIFT) +#define GEN8_RPCS_SS_CNT_ENABLE (1 << 11) +#define GEN8_RPCS_SS_CNT_SHIFT 8 +#define GEN8_RPCS_SS_CNT_MASK (0x7 << GEN8_RPCS_SS_CNT_SHIFT) +#define GEN8_RPCS_EU_MAX_SHIFT 4 +#define GEN8_RPCS_EU_MAX_MASK (0xf << GEN8_RPCS_EU_MAX_SHIFT) +#define GEN8_RPCS_EU_MIN_SHIFT 0 +#define GEN8_RPCS_EU_MIN_MASK (0xf << GEN8_RPCS_EU_MIN_SHIFT) + +#define RCS_TIMESTAMP (0x2000 + 0x358) + +static unsigned int __intel_gen__, __intel_devid__; +static uint64_t __slice_mask__, __subslice_mask__; +static unsigned int __slice_count__, __subslice_count__; + +static uint64_t mask_minus_one(uint64_t mask) +{ + unsigned int i; + + for (i = 0; i < (sizeof(mask) * 8 - 1); i++) { + if ((1ULL << i) & mask) + return mask & ~(1ULL << i); + } + + igt_assert(0); + return 0; +} + +static uint64_t mask_plus_one(uint64_t mask) +{ + unsigned int i; + + for (i = 0; i < (sizeof(mask) * 8 - 1); i++) { + if (((1ULL << i) & mask) == 0) + return mask | (1ULL << i); + } + + igt_assert(0); + return 0; +} + +static uint64_t mask_minus(uint64_t mask, int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mask = mask_minus_one(mask); + + return mask; +} + +static uint64_t mask_plus(uint64_t mask, int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mask = mask_plus_one(mask); + + return mask; +} + +static uint32_t * +fill_relocation(uint32_t *batch, + struct drm_i915_gem_relocation_entry *reloc, + uint32_t gem_handle, uint32_t delta, /* in bytes */ + uint32_t offset, /* in dwords */ + uint32_t read_domains, uint32_t write_domains) +{ + reloc->target_handle = gem_handle; + reloc->delta = delta; + reloc->offset = offset * sizeof(uint32_t); + reloc->presumed_offset = 0; + reloc->read_domains = read_domains; + reloc->write_domain = write_domains; + + *batch++ = delta; + *batch++ = 0; + + return batch; +} + + +static uint32_t +read_rpcs_reg(int fd, uint32_t ctx, uint32_t expected_slices, igt_spin_t *spin) +{ + struct drm_i915_gem_execbuffer2 execbuf = { }; + struct drm_i915_gem_relocation_entry relocs[2]; + struct drm_i915_gem_exec_object2 obj[2]; + uint32_t *batch, *b, data[2]; + unsigned int n_relocs = 0; + uint32_t rpcs; + + memset(obj, 0, sizeof(obj)); + obj[0].handle = gem_create(fd, 4096); + obj[1].handle = gem_create(fd, 4096); + + batch = b = + gem_mmap__cpu(fd, obj[1].handle, 0, 4096, PROT_READ | PROT_WRITE); + + if (expected_slices != 0 && __intel_gen__ < 10) + *b++ = MI_SET_PREDICATE | (1 - 1) | + (MI_SET_PREDICATE_1_SLICES + expected_slices - 1); + + *b++ = MI_STORE_REGISTER_MEM | (4 - 2); + *b++ = RCS_TIMESTAMP; + b = fill_relocation(b, &relocs[n_relocs++], obj[0].handle, + 0, b - batch, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + + *b++ = MI_STORE_REGISTER_MEM | (4 - 2); + *b++ = GEN8_R_PWR_CLK_STATE; + b = fill_relocation(b, &relocs[n_relocs++], obj[0].handle, + 4, b - batch, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + + if (expected_slices != 0 && __intel_gen__ < 10) + *b++ = MI_SET_PREDICATE | (1 - 1) | MI_SET_PREDICATE_NOOP_NEVER; + + *b++ = MI_BATCH_BUFFER_END; + + gem_munmap(batch, 4096); + + obj[1].relocation_count = n_relocs; + obj[1].relocs_ptr = to_user_pointer(relocs); + + execbuf.buffers_ptr = to_user_pointer(obj); + execbuf.buffer_count = ARRAY_SIZE(obj); + i915_execbuffer2_set_context_id(execbuf, ctx); + + gem_execbuf(fd, &execbuf); + + if (spin) { + igt_assert(gem_bo_busy(fd, spin->handle)); + igt_spin_batch_end(spin); + } + + gem_read(fd, obj[0].handle, 0, data, sizeof(data)); + + rpcs = data[1]; + + igt_debug("rcs_timestamp=0x%x rpcs=0x%x\n", data[0], data[1]); + + gem_close(fd, obj[0].handle); + gem_close(fd, obj[1].handle); + + return rpcs; +} + +typedef uint32_t (*read_slice_count_f)(int fd, uint32_t ctx, uint32_t expected, + igt_spin_t *spin); + +static read_slice_count_f __read_slice_count; + +static uint32_t +read_slice_count(int fd, uint32_t ctx, uint32_t expected_slice_count) +{ + return __read_slice_count(fd, ctx, expected_slice_count, NULL); +} + +static uint32_t +gen8_read_slice_count(int fd, uint32_t ctx, uint32_t expected_slice_count, + igt_spin_t *spin) +{ + return (read_rpcs_reg(fd, ctx, expected_slice_count, spin) & + GEN8_RPCS_S_CNT_MASK) >> GEN8_RPCS_S_CNT_SHIFT; +} + +static uint32_t +gen11_read_slice_count(int fd, uint32_t ctx, uint32_t expected_slice_count, + igt_spin_t *spin) +{ + return (read_rpcs_reg(fd, ctx, expected_slice_count, spin) & + GEN11_RPCS_S_CNT_MASK) >> GEN11_RPCS_S_CNT_SHIFT; +} + +static uint32_t +read_subslice_count(int fd, uint32_t ctx) +{ + return (read_rpcs_reg(fd, ctx, 0, NULL) & GEN8_RPCS_SS_CNT_MASK) >> + GEN8_RPCS_SS_CNT_SHIFT; +} + +static bool +kernel_has_per_context_sseu_support(int fd) +{ + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { + .param = I915_CONTEXT_PARAM_SSEU, + .size = sizeof(sseu), + .value = to_user_pointer(&sseu), + }; + int ret; + + if (__gem_context_get_param(fd, &arg)) + return false; + + arg.value = to_user_pointer(&sseu); + + ret = __gem_context_set_param(fd, &arg); + + igt_assert(ret == 0 || ret == -ENODEV || ret == -EINVAL); + + return ret == 0; +} + +static void +context_get_sseu_masks(int fd, uint32_t ctx, + uint64_t *slice_mask, + uint64_t *subslice_mask) +{ + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = ctx, + .size = sizeof(sseu), + .value = to_user_pointer(&sseu) }; + + gem_context_get_param(fd, &arg); + + if (slice_mask) + *slice_mask = sseu.slice_mask; + + if (subslice_mask) + *subslice_mask = sseu.subslice_mask; +} + +static void +context_set_slice_mask(int fd, uint32_t ctx, uint64_t slice_mask) +{ + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = ctx, + .size = sizeof(sseu), + .value = to_user_pointer(&sseu) }; + + gem_context_get_param(fd, &arg); + sseu.slice_mask = slice_mask; + gem_context_set_param(fd, &arg); +} + +static void +context_set_subslice_mask(int fd, uint32_t ctx, uint64_t subslice_mask) +{ + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = ctx, + .size = sizeof(sseu), + .value = to_user_pointer(&sseu) }; + + gem_context_get_param(fd, &arg); + sseu.subslice_mask = subslice_mask; + gem_context_set_param(fd, &arg); +} + +/* + * Verify that we can program the slice count. + */ +static void +test_slice_pg(int fd, uint32_t pg_slice_count) +{ + uint64_t pg_slice_mask = mask_minus(__slice_mask__, pg_slice_count); + unsigned int slice_count = __slice_count__ - pg_slice_count; + uint64_t slice_mask; + uint32_t ctx; + + igt_assert_eq(slice_count, __builtin_popcount(pg_slice_mask)); + + igt_skip_on(__intel_gen__ == 11 && + (slice_count != 1 && slice_count != __slice_count__)); + + ctx = gem_context_create(fd); + context_set_slice_mask(fd, ctx, pg_slice_mask); + context_get_sseu_masks(fd, ctx, &slice_mask, NULL); + igt_assert_eq(pg_slice_mask, slice_mask); + + /* + * Test false positives with predicates (only available on + * before Gen10). + */ + if (__intel_gen__ < 10) + igt_assert_eq(read_slice_count(fd, ctx, __slice_count__), 0); + + igt_assert_eq(read_slice_count(fd, ctx, 0), slice_count); + + gem_context_destroy(fd, ctx); +} + +/* + * Verify that we can program the subslice count. + */ +static void +test_subslice_pg(int fd, int pg_subslice_count) +{ + uint64_t pg_subslice_mask = + mask_minus(__subslice_mask__, pg_subslice_count); + unsigned int subslice_count = __subslice_count__ - pg_subslice_count; + uint64_t subslice_mask; + uint32_t ctx; + + igt_assert_eq(subslice_count, __builtin_popcount(pg_subslice_mask)); + + igt_skip_on(__intel_gen__ == 11 && + (subslice_count != __subslice_count__ && + subslice_count != (__subslice_count__ / 2))); + + ctx = gem_context_create(fd); + context_set_subslice_mask(fd, ctx, pg_subslice_mask); + context_get_sseu_masks(fd, ctx, NULL, &subslice_mask); + igt_assert_eq(pg_subslice_mask, subslice_mask); + + igt_assert_eq(read_subslice_count(fd, ctx), subslice_count); + + gem_context_destroy(fd, ctx); +} + +static bool has_engine(int fd, unsigned int class, unsigned int instance) +{ + int pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance)); + + if (pmu >= 0) + close(pmu); + + return pmu >= 0; +} + +/* + * Verify that invalid engines are rejected and valid ones are accepted. + */ +static void test_engines(int fd) +{ + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = gem_context_create(fd), + .size = sizeof(sseu), + .value = to_user_pointer(&sseu) }; + unsigned int class, instance; + int last_with_engines; + + /* get_param */ + + sseu.instance = -1; /* Assumed invalid. */ + igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL); + + sseu.class = I915_ENGINE_CLASS_INVALID; /* Both invalid. */ + igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL); + + sseu.instance = 0; /* Class invalid. */ + igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL); + sseu.class = I915_ENGINE_CLASS_RENDER; + + last_with_engines = -1; + for (class = 0; class < ~0; class++) { + for (instance = 0; instance < ~0; instance++) { + int ret; + + sseu.class = class; + sseu.instance = instance; + + ret = __gem_context_get_param(fd, &arg); + + if (has_engine(fd, class, instance)) { + igt_assert_eq(ret, 0); + last_with_engines = class; + } else { + igt_assert_eq(ret, -EINVAL); + if (instance > 8) /* Skip over some instance holes. */ + break; + } + } + + if (class - last_with_engines > 8) /* Skip over some class holes. */ + break; + } + + /* + * Get some proper values before trying to reprogram them onto + * an invalid engine. + */ + sseu.class = 0; + sseu.instance = 0; + gem_context_get_param(fd, &arg); + + /* set_param */ + + sseu.instance = -1; /* Assumed invalid. */ + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + sseu.class = I915_ENGINE_CLASS_INVALID; /* Both invalid. */ + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + sseu.instance = 0; /* Class invalid. */ + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + last_with_engines = -1; + for (class = 0; class < ~0; class++) { + for (instance = 0; instance < ~0; instance++) { + int ret; + + sseu.class = class; + sseu.instance = instance; + + ret = __gem_context_set_param(fd, &arg); + + if (has_engine(fd, class, instance)) { + igt_assert(ret == 0 || ret == -ENODEV); + last_with_engines = class; + } else { + igt_assert_eq(ret, -EINVAL); + if (instance > 8) /* Skip over some instance holes. */ + break; + } + } + + if (class - last_with_engines > 8) /* Skip over some class holes. */ + break; + } + + gem_context_destroy(fd, arg.ctx_id); +} + +/* + * Verify that invalid arguments are rejected. + */ +static void +test_invalid_args(int fd) +{ + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = gem_context_create(fd), + }; + struct drm_i915_gem_context_param_sseu sseu = { }; + unsigned char *page[2]; + unsigned char *addr; + unsigned int sz; + + /* get param */ + + /* Invalid size. */ + arg.size = 1; + igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL); + + /* Query size. */ + arg.size = 0; + igt_assert_eq(__gem_context_get_param(fd, &arg), 0); + sz = arg.size; + + /* Bad pointers. */ + igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT); + arg.value = -1; + igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT); + arg.value = 1; + igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT); + + /* Unmapped. */ + page[0] = mmap(0, 4096, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + igt_assert(page[0] != MAP_FAILED); + memset(page[0], 0, sizeof(sseu)); + munmap(page[0], 4096); + arg.value = to_user_pointer(page[0]); + igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT); + + /* Straddle into unmapped area. */ + page[0] = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + igt_assert(page[0] != MAP_FAILED); + munmap(page[0], 8192); + page[0] = mmap(page[0], 4096, + PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + igt_assert(page[0] != MAP_FAILED); + memset(page[0], 0, sizeof(sseu)); + page[1] = mmap((void *)((unsigned long)page[0] + 4096), 4096, + PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + igt_assert(page[1] != MAP_FAILED); + memset(page[1], 0, sizeof(sseu)); + munmap(page[1], 4096); + arg.value = to_user_pointer(page[1]) - + sizeof(struct drm_i915_gem_context_param_sseu) + 4; + igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT); + munmap(page[0], 4096); + + /* Straddle into read-only area. */ + page[0] = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + igt_assert(page[0] != MAP_FAILED); + munmap(page[0], 8192); + page[0] = mmap(page[0], 4096, + PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + igt_assert(page[0] != MAP_FAILED); + memset(page[0], 0, sizeof(sseu)); + page[1] = mmap((void *)((unsigned long)page[0] + 4096), 4096, + PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + igt_assert(page[1] != MAP_FAILED); + memset(page[1], 0, sizeof(sseu)); + igt_assert(mprotect(page[1], 4096, PROT_READ) == 0); + arg.value = to_user_pointer(page[1] - sizeof(sseu) + 4); + igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT); + munmap(page[0], 4096); + munmap(page[1], 4096); + + /* set param */ + + /* Invalid sizes. */ + arg.size = 1; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + arg.size = 0; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + arg.size = sz; + + /* Bad pointers. */ + arg.value = 0; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT); + arg.value = -1; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT); + arg.value = 1; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT); + + /* Get valid SSEU. */ + arg.value = to_user_pointer(&sseu); + igt_assert_eq(__gem_context_get_param(fd, &arg), 0); + + /* Unmapped. */ + page[0] = mmap(0, 4096, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + igt_assert(page[0] != MAP_FAILED); + memcpy(page[0], &sseu, sizeof(sseu)); + munmap(page[0], 4096); + arg.value = to_user_pointer(page[0]); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT); + + /* Straddle into unmapped area. */ + page[0] = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + igt_assert(page[0] != MAP_FAILED); + munmap(page[0], 8192); + page[0] = mmap(page[0], 4096, + PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + igt_assert(page[0] != MAP_FAILED); + page[1] = mmap((void *)((unsigned long)page[0] + 4096), 4096, + PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + igt_assert(page[1] != MAP_FAILED); + addr = page[1] - sizeof(sseu) + 4; + memcpy(addr, &sseu, sizeof(sseu)); + munmap(page[1], 4096); + arg.value = to_user_pointer(addr); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT); + munmap(page[0], 4096); + + gem_context_destroy(fd, arg.ctx_id); +} + +/* + * Verify that ggtt mapped area can be used as the sseu pointer. + */ +static void +test_ggtt_args(int fd) +{ + struct drm_i915_gem_context_param_sseu *sseu; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = gem_context_create(fd), + .size = sizeof(*sseu), + }; + uint32_t bo; + + bo = gem_create(fd, 4096); + arg.value = to_user_pointer(gem_mmap__gtt(fd, bo, 4096, + PROT_READ | PROT_WRITE)); + + igt_assert_eq(__gem_context_get_param(fd, &arg), 0); + igt_assert_eq(__gem_context_set_param(fd, &arg), 0); + + munmap((void *)arg.value, 4096); + gem_close(fd, bo); + gem_context_destroy(fd, arg.ctx_id); +} + +/* + * Verify that invalid SSEU values are rejected. + */ +static void +test_invalid_sseu(int fd) +{ + struct drm_i915_gem_context_param_sseu device_sseu = { }; + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = gem_context_create(fd), + .size = sizeof(sseu), + }; + unsigned int i; + + /* Fetch the device defaults. */ + arg.value = to_user_pointer(&device_sseu); + gem_context_get_param(fd, &arg); + + arg.value = to_user_pointer(&sseu); + + /* Try all slice masks known to be invalid. */ + sseu = device_sseu; + for (i = 1; i <= (8 - __slice_count__); i++) { + sseu.slice_mask = mask_plus(__slice_mask__, i); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + } + + /* 0 slices. */ + sseu.slice_mask = 0; + igt_assert_eq(-EINVAL, __gem_context_set_param(fd, &arg)); + + /* Try all subslice masks known to be invalid. */ + sseu = device_sseu; + for (i = 1; i <= (8 - __subslice_count__); i++) { + sseu.subslice_mask = mask_plus(__subslice_mask__, i); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + } + + /* 0 subslices. */ + sseu.subslice_mask = 0; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + /* Try number of EUs superior to the max available. */ + sseu = device_sseu; + sseu.min_eus_per_subslice = device_sseu.max_eus_per_subslice + 1; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + sseu = device_sseu; + sseu.max_eus_per_subslice = device_sseu.max_eus_per_subslice + 1; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + /* Try to program 0 max EUs. */ + sseu = device_sseu; + sseu.max_eus_per_subslice = 0; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + /* Min > max */ + sseu = device_sseu; + sseu.min_eus_per_subslice = sseu.max_eus_per_subslice; + sseu.max_eus_per_subslice = 1; + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + if (__intel_gen__ != 11) + goto out; + + /* Subset of subslices but slice mask greater than one. */ + if (__slice_count__ > 1) { + sseu = device_sseu; + sseu.subslice_mask = mask_minus_one(sseu.subslice_mask); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + } + + /* Odd subslices above four. */ + sseu = device_sseu; + sseu.slice_mask = 0x1; + sseu.subslice_mask = mask_minus_one(sseu.subslice_mask); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + /* More than half subslices with one slice. */ + sseu = device_sseu; + sseu.slice_mask = 0x1; + sseu.subslice_mask = mask_minus(sseu.subslice_mask, + __subslice_count__ / 2 - 1); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + + /* VME */ + + /* Slice count between one and max. */ + if (__slice_count__ > 2) { + sseu = device_sseu; + sseu.slice_mask = mask_minus_one(sseu.slice_mask); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + } + + /* Less than half subslices with one slice. */ + sseu = device_sseu; + sseu.slice_mask = 0x1; + sseu.subslice_mask = mask_minus(sseu.subslice_mask, + __subslice_count__ / 2 + 1); + igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL); + +out: + gem_context_destroy(fd, arg.ctx_id); +} + +/* Verify that the kernel returns a correct error value on Gen < 8. */ +static void +init_contexts(int fd, uint32_t *ctx, unsigned int num, + uint64_t mask0, uint64_t mask1) +{ + unsigned int i; + + igt_assert_eq(num, 2); + + for (i = 0; i < num; i++) + ctx[i] = gem_context_create(fd); + + context_set_slice_mask(fd, ctx[0], mask0); + context_set_slice_mask(fd, ctx[1], mask1); +} + +static void +def_sseu(struct drm_i915_gem_context_param_sseu *sseu, + struct drm_i915_gem_context_param_sseu *def) +{ + memcpy(sseu, def, sizeof(*sseu)); +} + +static void +pg_sseu(struct drm_i915_gem_context_param_sseu *sseu, + struct drm_i915_gem_context_param_sseu *def) +{ + unsigned int ss; + + memcpy(sseu, def, sizeof(*sseu)); + + ss = __builtin_popcount(def->subslice_mask); + + /* Gen11 VME friendly configuration. */ + sseu->slice_mask = 1; + sseu->subslice_mask = ~(~0 << (ss / 2)); +} + +static void +oa_sseu(struct drm_i915_gem_context_param_sseu *sseu, + struct drm_i915_gem_context_param_sseu *def) +{ + if (__intel_gen__ == 11) + pg_sseu(sseu, def); + else + def_sseu(sseu, def); +} + +static void +get_device_sseu(int fd, struct drm_i915_gem_context_param_sseu *sseu) +{ + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .size = sizeof(*sseu), + .value = to_user_pointer(sseu) }; + + memset(sseu, 0, sizeof(*sseu)); + gem_context_get_param(fd, &arg); +} + +/* + * Verify that powergating settings are put on hold while i915/perf is + * active. + */ +static void +test_perf_oa(int fd) +{ + uint64_t properties[] = { + /* Include OA reports in samples */ + DRM_I915_PERF_PROP_SAMPLE_OA, true, + + /* OA unit configuration */ + DRM_I915_PERF_PROP_OA_METRICS_SET, 1, /* test metric */ + DRM_I915_PERF_PROP_OA_FORMAT, I915_OA_FORMAT_A32u40_A4u32_B8_C8, + DRM_I915_PERF_PROP_OA_EXPONENT, 20, + }; + struct drm_i915_perf_open_param param = { + .flags = I915_PERF_FLAG_FD_CLOEXEC | + I915_PERF_FLAG_FD_NONBLOCK, + .num_properties = ARRAY_SIZE(properties) / 2, + .properties_ptr = to_user_pointer(properties), + }; + struct drm_i915_gem_context_param_sseu device_sseu, _pg_sseu, _oa_sseu; + unsigned int pg_slice_count, oa_slice_count; + uint32_t ctx[2]; + unsigned int i; + int perf_fd; + + get_device_sseu(fd, &device_sseu); + pg_sseu(&_pg_sseu, &device_sseu); + oa_sseu(&_oa_sseu, &device_sseu); + pg_slice_count = __builtin_popcount(_pg_sseu.slice_mask); + oa_slice_count = __builtin_popcount(_oa_sseu.slice_mask); + + init_contexts(fd, ctx, ARRAY_SIZE(ctx), + device_sseu.slice_mask, _pg_sseu.slice_mask); + + /* + * Test false positives with predicates (only available on + * before Gen10). + */ + if (__intel_gen__ < 10) + igt_assert_eq(read_slice_count(fd, ctx[1], __slice_count__), + 0); + + igt_assert_eq(read_slice_count(fd, ctx[0], 0), __slice_count__); + igt_assert_eq(read_slice_count(fd, ctx[1], 0), pg_slice_count); + + /* + * Now open i915/perf and verify that all contexts have been + * reconfigured to the device's default. + */ + perf_fd = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); + igt_assert_fd(perf_fd); + + if (__intel_gen__ < 10) + igt_assert_eq(read_slice_count(fd, ctx[1], pg_slice_count), 0); + + igt_assert_eq(read_slice_count(fd, ctx[0], 0), oa_slice_count); + igt_assert_eq(read_slice_count(fd, ctx[1], 0), oa_slice_count); + if (__intel_gen__ == 11) { + igt_assert_eq(read_subslice_count(fd, ctx[0]), + __subslice_count__ / 2); + igt_assert_eq(read_subslice_count(fd, ctx[1]), + __subslice_count__ / 2); + } + + close(perf_fd); + + /* + * After closing the perf stream, configurations should be + * back to the programmed values. + */ + if (__intel_gen__ < 10) + igt_assert_eq(read_slice_count(fd, ctx[1], __slice_count__), 0); + igt_assert_eq(read_slice_count(fd, ctx[0], 0), __slice_count__); + igt_assert_eq(read_slice_count(fd, ctx[1], 0), pg_slice_count); + + for (i = 0; i < ARRAY_SIZE(ctx); i++) + gem_context_destroy(fd, ctx[i]); + + /* + * Open i915/perf first and verify that all contexts created + * afterward are configured to the device's default. + */ + perf_fd = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, ¶m); + igt_assert_fd(perf_fd); + + init_contexts(fd, ctx, ARRAY_SIZE(ctx), + device_sseu.slice_mask, _pg_sseu.slice_mask); + + /* + * Check the device's default values, despite setting + * otherwise. + */ + if (__intel_gen__ < 10) + igt_assert_eq(read_slice_count(fd, ctx[1], pg_slice_count), 0); + + igt_assert_eq(read_slice_count(fd, ctx[0], 0), oa_slice_count); + igt_assert_eq(read_slice_count(fd, ctx[1], 0), oa_slice_count); + if (__intel_gen__ == 11) { + igt_assert_eq(read_subslice_count(fd, ctx[0]), + __subslice_count__ / 2); + igt_assert_eq(read_subslice_count(fd, ctx[1]), + __subslice_count__ / 2); + } + + close(perf_fd); + + /* + * After closing the perf stream, configurations should be + * back to the programmed values. + */ + if (__intel_gen__ < 10) + igt_assert_eq(read_slice_count(fd, ctx[1], __slice_count__), + 0); + + igt_assert_eq(read_slice_count(fd, ctx[0], 0), __slice_count__); + igt_assert_eq(read_slice_count(fd, ctx[1], 0), pg_slice_count); + + for (i = 0; i < ARRAY_SIZE(ctx); i++) + gem_context_destroy(fd, ctx[i]); +} + +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) +{ + struct igt_spin_factory opts = { + .ctx = ctx, + .engine = flags, + }; + + if (gem_can_store_dword(fd, flags)) + opts.flags |= IGT_SPIN_POLL_RUN; + + return __igt_spin_batch_factory(fd, &opts); +} + +static unsigned long __spin_wait(int fd, igt_spin_t *spin) +{ + struct timespec start = { }; + + igt_nsec_elapsed(&start); + + if (spin->running) { + unsigned long timeout = 0; + + while (!READ_ONCE(*spin->running)) { + unsigned long t = igt_nsec_elapsed(&start); + + if ((t - timeout) > 250e6) { + timeout = t; + igt_warn("Spinner not running after %.2fms\n", + (double)t / 1e6); + } + } + } else { + igt_debug("__spin_wait - usleep mode\n"); + usleep(500e3); /* Better than nothing! */ + } + + return igt_nsec_elapsed(&start); +} + +static igt_spin_t * __spin_sync(int fd, uint32_t ctx, unsigned long flags) +{ + igt_spin_t *spin = __spin_poll(fd, ctx, flags); + + __spin_wait(fd, spin); + + return spin; +} + +static uint32_t +read_slice_count_busy(int fd, uint32_t context, uint32_t expected, + igt_spin_t *spin) +{ + return __read_slice_count(fd, context, expected, spin); +} + +#define TEST_IDLE (1) +#define TEST_BUSY (2) +#define TEST_RESET (4) +#define TEST_HANG (8) +#define TEST_SUSPEND (16) + +static igt_spin_t * +__pre_set(int fd, unsigned flags, uint32_t ctx) +{ + if (flags & TEST_BUSY) + return __spin_sync(fd, ctx, I915_EXEC_RENDER); + + return NULL; +} + +static igt_spin_t * +__post_set(int fd, unsigned int flags, uint32_t ctx, igt_spin_t *spin, + unsigned int expected) +{ + if (flags & TEST_RESET) + igt_force_gpu_reset(fd); + + if ((flags & TEST_BUSY) && !(flags & (TEST_RESET | TEST_HANG))) + igt_assert_eq(read_slice_count_busy(fd, ctx, 0, spin), + expected); + else + igt_assert_eq(read_slice_count(fd, ctx, 0), expected); + + igt_assert_eq(read_slice_count(fd, 0, 0), __slice_count__); + + if (spin) + igt_spin_batch_free(fd, spin); + + if (flags & TEST_IDLE) + igt_drop_caches_set(fd, DROP_RETIRE | DROP_IDLE | DROP_ACTIVE); + + if (flags & TEST_SUSPEND) + igt_system_suspend_autoresume(SUSPEND_STATE_MEM, + SUSPEND_TEST_NONE); + + igt_assert_eq(read_slice_count(fd, ctx, 0), expected); + igt_assert_eq(read_slice_count(fd, 0, 0), __slice_count__); + + return NULL; +} + +/* + * Test context re-configuration with either idle or busy contexts. + */ +static void +test_dynamic(int fd, unsigned int flags) +{ + struct drm_i915_gem_context_param_sseu device_sseu; + struct drm_i915_gem_context_param_sseu sseu = { }; + struct drm_i915_gem_context_param arg = + { .param = I915_CONTEXT_PARAM_SSEU, + .ctx_id = gem_context_create(fd), + .size = sizeof(sseu), + .value = to_user_pointer(&sseu) }; + igt_spin_t *spin = NULL; + + igt_require(__slice_count__ > 1); + + get_device_sseu(fd, &device_sseu); + def_sseu(&sseu, &device_sseu); + + /* First set the default mask */ + spin = __pre_set(fd, flags, arg.ctx_id); + gem_context_set_param(fd, &arg); + spin = __post_set(fd, flags, arg.ctx_id, spin, + __builtin_popcount(sseu.slice_mask)); + + /* Then set a powergated configuration */ + spin = __pre_set(fd, flags, arg.ctx_id); + pg_sseu(&sseu, &device_sseu); + gem_context_set_param(fd, &arg); + spin = __post_set(fd, flags, arg.ctx_id, spin, + __builtin_popcount(sseu.slice_mask)); + + /* Put the device's default back again */ + spin = __pre_set(fd, flags, arg.ctx_id); + def_sseu(&sseu, &device_sseu); + gem_context_set_param(fd, &arg); + spin = __post_set(fd, flags, arg.ctx_id, spin, + __builtin_popcount(sseu.slice_mask)); + + /* One last powergated config for the road... */ + spin = __pre_set(fd, flags, arg.ctx_id); + pg_sseu(&sseu, &device_sseu); + gem_context_set_param(fd, &arg); + spin = __post_set(fd, flags, arg.ctx_id, spin, + __builtin_popcount(sseu.slice_mask)); + + gem_context_destroy(fd, arg.ctx_id); +} + +igt_main +{ + unsigned int max_slices = 3, max_subslices = 3; + unsigned int i; + int fd; + + igt_fixture { + fd = drm_open_driver(DRIVER_INTEL); + igt_require_gem(fd); + + __intel_devid__ = intel_get_drm_devid(fd); + __intel_gen__ = intel_gen(__intel_devid__); + + igt_require(kernel_has_per_context_sseu_support(fd)); + + if (__intel_gen__ >= 11) + __read_slice_count = gen11_read_slice_count; + else + __read_slice_count = gen8_read_slice_count; + } + + igt_subtest_group { + igt_fixture { + drm_i915_getparam_t gp; + + gp.param = I915_PARAM_SLICE_MASK; + gp.value = (int *) &__slice_mask__; + do_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + __slice_count__ = __builtin_popcount(__slice_mask__); + + gp.param = I915_PARAM_SUBSLICE_MASK; + gp.value = (int *) &__subslice_mask__; + do_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + __subslice_count__ = + __builtin_popcount(__subslice_mask__); + } + + igt_subtest("invalid-args") + test_invalid_args(fd); + + igt_subtest("invalid-sseu") + test_invalid_sseu(fd); + + igt_subtest("ggtt-args") + test_ggtt_args(fd); + + igt_subtest("engines") + test_engines(fd); + + for (i = 1; i < max_slices; i++) { + igt_subtest_f("slice-pg-%i", i) { + igt_require(__slice_count__ > i); + + test_slice_pg(fd, i); + } + } + + for (i = 1; i < max_subslices; i++) { + igt_subtest_f("subslice-pg-%i", i) { + igt_require(__subslice_count__ >= 2); + + /* + * Only available on some Atom platforms and + * Gen10+. + */ + igt_require(IS_BROXTON(__intel_devid__) || + IS_GEMINILAKE(__intel_devid__) || + __intel_gen__ >= 10); + + test_subslice_pg(fd, i); + } + } + + igt_subtest("perf-oa") { + igt_require(__slice_count__ > 1); + + test_perf_oa(fd); + } + + igt_subtest("dynamic") + test_dynamic(fd, 0); + + igt_subtest("dynamic-busy") + test_dynamic(fd, TEST_BUSY); + + igt_subtest("dynamic-reset") + test_dynamic(fd, TEST_RESET); + + igt_subtest("dynamic-busy-reset") + test_dynamic(fd, TEST_BUSY | TEST_RESET); + + igt_subtest("dynamic-busy-hang") + test_dynamic(fd, TEST_BUSY | TEST_HANG); + + igt_subtest("dynamic-idle") + test_dynamic(fd, TEST_IDLE); + + igt_subtest("dynamic-suspend") + test_dynamic(fd, TEST_SUSPEND); + + igt_subtest("dynamic-idle-suspend") + test_dynamic(fd, TEST_IDLE | TEST_SUSPEND); + } + + igt_fixture { + close(fd); + } +} diff --git a/tests/meson.build b/tests/meson.build index d22d59e0837d..f70c859f1a62 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -248,6 +248,13 @@ foreach prog : test_progs install : true) endforeach +test_executables += executable('gem_ctx_sseu', 'gem_ctx_sseu.c', + dependencies : test_deps + [ lib_igt_perf ], + install_dir : libexecdir, + install_rpath : libexecdir_rpathdir, + install : true) +test_progs += 'gem_ctx_sseu' + test_executables += executable('gem_eio', 'gem_eio.c', dependencies : test_deps + [ realtime ], install_dir : libexecdir, -- 2.17.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx