On 10/03/16 11:03, Derek Morton wrote:
This is intended to test the scheduler behaviour is correct. The subtests are <ring>-basic Tests that batch buffers of the same priority submitted to a ring execute in the order they are submitted. <ring>-read Submits a batch buffer with a read dependency to a buffer object to a ring which is held in the scheduler queue by a long running batch buffer. Submit batch buffers to other rings that have a read dependency to the same buffer object. Ensure they execute before the batch buffer being held up behind the long running batch buffer. <ring>-write Submits a batch buffer with a write dependency to a buffer object to a ring which is held in the scheduler queue by a long running batch buffer. Submit batch buffers to other rings that have a write dependency to the same buffer object. Submit batch buffers with no interdependencies to all rings. Ensure the batch buffers that have write dependencies are executed in submission order but the batch buffers without interdependencies do not get held up. v2: Addressed review comments from Daniele Ceraolo Spurio v3: Added logic to use generic BSD ring if there is 1 and BSD1 / BSD2 if there are 2. Signed-off-by: Derek Morton<derek.j.morton@xxxxxxxxx> --- tests/Makefile.sources | 1 + tests/gem_scheduler.c | 459 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 460 insertions(+) create mode 100644 tests/gem_scheduler.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index f8b18b0..c88e045 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -66,6 +66,7 @@ TESTS_progs_M = \ gem_request_retire \ gem_reset_stats \ gem_ringfill \ + gem_scheduler \ gem_set_tiling_vs_blt \ gem_softpin \ gem_stolen \ diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c new file mode 100644 index 0000000..436440a --- /dev/null +++ b/tests/gem_scheduler.c @@ -0,0 +1,459 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Derek Morton<derek.j.morton@xxxxxxxxx> + * + */ + +#include "igt.h" +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <time.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <fcntl.h> + +IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure independant " + "batch buffers of the same priority are executed in " + "submission order. Read-read tests ensure " + "batch buffers with a read dependency to the same buffer " + "object do not block each other. Write-write dependency " + "tests ensure batch buffers with a write dependency to a " + "buffer object will be executed in submission order but " + "will not block execution of other independant batch " + "buffers."); + +#define SEC_TO_NSEC (1000 * 1000 * 1000) + +static struct ring { + const char *name; + int id; + bool exists; +} rings[] = { + { "render", I915_EXEC_RENDER, false }, + { "bsd", I915_EXEC_BSD , false }, + { "bsd2", I915_EXEC_BSD | 2<<13, false }, + { "blt", I915_EXEC_BLT, false }, + { "vebox", I915_EXEC_VEBOX, false }, +};
If you can't use intel_execution_engines could you add a comment to explain why? also considering the renaming going on in the kernel you cold replace "ring" with "engine"
+ +#define NBR_RINGS (sizeof(rings)/sizeof(struct ring)) + +static void check_rings(int fd) { + int loop; + for(loop=0; loop < NBR_RINGS; loop++) {
Style: space between for/if and "(" in this file
+ if(gem_has_ring(fd, rings[loop].id)) { + if(rings[loop].id == (I915_EXEC_BSD | 2<<13)) { + rings[loop].exists = gem_has_bsd2(fd); + } + else { + rings[loop].exists = true;
You're marking the VEBOX as existing so adding a gen requirement in this function would be nice. I know that the library you're using already requires gen8, but the requirement doesn't look to be explicit anywhere in this file so it would be nice to add it for clarity.
+ if(rings[loop].id == I915_EXEC_BSD) + /* If there is BSD2, need to make BSD1 + * explicit. + */ + if(gem_has_bsd2(fd)) + rings[loop].id |= (1<<13); + } + } + } +} + +static drm_intel_bo *create_and_check_bo(drm_intel_bufmgr *bufmgr, const char *desc) +{ + drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, desc, BATCH_SZ, BATCH_SZ); + igt_assert_f(bo, "Failed allocating %s\n", desc); + return bo; +} + +static struct intel_batchbuffer *create_delay_bb(int fd, drm_intel_bufmgr *bufmgr, + int ringid, uint32_t loops, + drm_intel_bo *dest) +{ + struct intel_batchbuffer *buffer; + buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); + igt_assert(buffer); + igt_create_delay_bb(fd, buffer, ringid, loops, dest); + return buffer; +} + +static struct intel_batchbuffer *create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr, + int ringid, drm_intel_bo *dest, + drm_intel_bo *load, bool write) +{ + struct intel_batchbuffer *buffer; + buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); + igt_assert(buffer); + igt_create_timestamp_bb(fd, buffer, ringid, dest, load, write); + return buffer; +} + +static struct intel_batchbuffer *create_noop_bb(int fd, drm_intel_bufmgr *bufmgr, + int noops) +{ + struct intel_batchbuffer *buffer; + buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd)); + igt_assert(buffer); + igt_create_noop_bb(buffer, noops); + return buffer; +} + +static void init_context(int *fd, drm_intel_bufmgr **bufmgr, int ringid)
init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not init_context might be a bit misleading as a name as you're not a ctx directly, although initializing
+{ + struct intel_batchbuffer *noop_bb; + *fd = drm_open_driver(DRIVER_INTEL); + igt_assert(*fd >= 0); + *bufmgr = drm_intel_bufmgr_gem_init(*fd, BATCH_SZ); + igt_assert(*bufmgr); + drm_intel_bufmgr_gem_enable_reuse(*bufmgr); + /* Send a noop batch buffer to force any deferred initialisation */ + noop_bb = create_noop_bb(*fd, *bufmgr, 5); + intel_batchbuffer_flush_on_ring(noop_bb, ringid); + intel_batchbuffer_free(noop_bb); +} + +/* Basic test. Check batch buffers of the same priority and with no dependencies + * are executed in the order they are submitted. + */ +#define NBR_BASIC_FDs (3) +static void run_test_basic(int in_flight, int ringid) +{ + int fd[NBR_BASIC_FDs]; + int loop; + drm_intel_bufmgr *bufmgr[NBR_BASIC_FDs]; + uint32_t *delay_buf, *ts1_buf, *ts2_buf; + struct intel_batchbuffer *ts1_bb, *ts2_bb; + struct intel_batchbuffer **in_flight_bbs; + uint32_t calibrated_1s; + drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo; + + in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *)); + igt_assert(in_flight_bbs); + + /* Need multiple i915 fd's. Scheduler will not change execution order of + * batch buffers from the same context. + */ + for(loop=0; loop < NBR_BASIC_FDs; loop++) + init_context(&(fd[loop]), &(bufmgr[loop]), ringid); + + + /* Create buffer objects */ + delay_bo = create_and_check_bo(bufmgr[0], "delay bo"); + ts1_bo = create_and_check_bo(bufmgr[1], "ts1 bo"); + ts2_bo = create_and_check_bo(bufmgr[2], "ts2 bo"); + + /* Put some non zero values in the delay bo */
You do this write for every delay batch, so you could move it to create_delay_bb (or to the library function)
+ {
Why is this extra scope needed? "data" doesn't clash with any other variable name
+ uint32_t data=0xffffffff; + drm_intel_bo_subdata(delay_bo, 0, 4, &data); + } + + calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid); + + /* Batch buffers to fill the in flight queue */ + in_flight_bbs[0] = create_delay_bb(fd[0], bufmgr[0], ringid, calibrated_1s, delay_bo); + for(loop = 1; loop < in_flight; loop++) + in_flight_bbs[loop] = create_noop_bb(fd[0], bufmgr[0], 5); + + /* Extra batch buffers in the scheduler queue */ + ts1_bb = create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, NULL, false); + ts2_bb = create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, NULL, false); + + /* Flush batchbuffers */ + for(loop = 0; loop < in_flight; loop++) + intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid); + intel_batchbuffer_flush_on_ring(ts1_bb, ringid); + intel_batchbuffer_flush_on_ring(ts2_bb, ringid); + + /* This will not return until the bo has finished executing */ + drm_intel_bo_map(delay_bo, 0); + drm_intel_bo_map(ts1_bo, 0); + drm_intel_bo_map(ts2_bo, 0); + + delay_buf = delay_bo->virtual; + ts1_buf = ts1_bo->virtual; + ts2_buf = ts2_bo->virtual; + + igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]); + igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]); + igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]); + + /* buf[0] in the target buffer should be 0 if the batch buffer completed */ + igt_assert_f(delay_buf[0] == 0, + "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", delay_buf[0]); + + igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]), + "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 ")\n", + delay_buf[2], ts1_buf[0]); + igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]), + "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 ")\n", + ts1_buf[0], ts2_buf[0]); + + /* Cleanup */ + for(loop = 0; loop < in_flight; loop++) + intel_batchbuffer_free(in_flight_bbs[loop]); + intel_batchbuffer_free(ts1_bb); + intel_batchbuffer_free(ts2_bb); + + drm_intel_bo_unreference(delay_bo); + drm_intel_bo_unreference(ts1_bo); + drm_intel_bo_unreference(ts2_bo); + for(loop = 0; loop < NBR_BASIC_FDs; loop++) { + drm_intel_bufmgr_destroy(bufmgr[loop]); + close(fd[loop]); + } + free(in_flight_bbs); +} + +/* Dependency test. + * write=0, Submit batch buffers with read dependencies to all rings. Delay one + * with a long executing batch buffer. Check the others are not held up. + * write=1, Submit batch buffers with write dependencies to all rings. Delay one + * with a long executing batch buffer. Also submit batch buffers with no + * dependencies to all rings. Batch buffers with write dependencies should be + * executed in submission order. The batch buffers with no dependencies should + * not be held up. + */ +static void run_test_dependency(int in_flight, int ring, bool write) +{ + int fd[NBR_RINGS], fd2[NBR_RINGS]; + int loop; + int prime_fd; + uint32_t *delay_buf, *ts_buf[NBR_RINGS], *ts2_buf[NBR_RINGS], *shared_buf; + uint32_t calibrated_1s; + drm_intel_bufmgr *bufmgr[NBR_RINGS], *bufmgr2[NBR_RINGS]; + struct intel_batchbuffer *ts_bb[NBR_RINGS], *ts2_bb[NBR_RINGS], **in_flight_bbs; + drm_intel_bo *delay_bo, *ts_bo[NBR_RINGS], *ts2_bo[NBR_RINGS], *shared_bo[NBR_RINGS]; + + in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *)); + igt_assert(in_flight_bbs); + + /* Need multiple i915 fd's. Scheduler will not change execution order of + * batch buffers from the same context. + */ + for(loop=0; loop < NBR_RINGS; loop++) {
You have several loops on all engines but with a bit of reordering you should be able to fit everything in less loops. Unless I've missed a dependency, you should be able to do something like:
for (loop=0; loop < NBR_RINGS; loop++) { init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id); ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo"); if (loop == 0) shared_bo[0] = create_and_check_bo(...); drm_intel_bo_gem_export_to_prime(...); else shared_bo[loop] = drm_intel_bo_gem_create_from_prime(...); ts_bb[loop] = create_timestamp_bb(..); if (write) { .... } } Also the operations performed in the "if (write)" case are the same of the ones in the main loop with the exception of the shared bo so potentially you could move the whole block in a separate function if it doesn't get too messy.
+ init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id); + if(write) + init_context(&(fd2[loop]), &(bufmgr2[loop]), rings[loop].id); + } + + /* Create buffer objects */ + delay_bo = create_and_check_bo(bufmgr[ring], "delay bo"); + for(loop = 0; loop < NBR_RINGS; loop++) { + ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo"); + if(write) + ts2_bo[loop] = create_and_check_bo(bufmgr2[loop], "ts2 bo"); + } + + /* Create shared buffer object */ + shared_bo[0] = create_and_check_bo(bufmgr[0], "shared bo"); + + drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd); + for(loop = 1; loop < NBR_RINGS; loop++) { + shared_bo[loop] = drm_intel_bo_gem_create_from_prime(bufmgr[loop], + prime_fd, BATCH_SZ); + igt_assert(shared_bo[loop]); + } + close(prime_fd); + + /* Put some non zero values in the delay and shared bo */ + drm_intel_bo_map(delay_bo, 1); + delay_buf = delay_bo->virtual; + delay_buf[0] = 0xff; + drm_intel_bo_unmap(delay_bo); + drm_intel_bo_map(shared_bo[0], 1); + shared_buf = shared_bo[0]->virtual; + shared_buf[0] = 0xff00ff00; + drm_intel_bo_unmap(shared_bo[0]);
using subdata might be cleaner here as well
+ + calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], rings[ring].id); + + /* Batch buffers to fill the in flight queue */ + in_flight_bbs[0] = create_delay_bb(fd[ring], bufmgr[ring], rings[ring].id, + calibrated_1s, delay_bo); + for(loop = 1; loop < in_flight; loop++) + in_flight_bbs[loop] = create_noop_bb(fd[ring], bufmgr[ring], 5); + + for(loop = 0; loop < NBR_RINGS; loop++) { + ts_bb[loop] = create_timestamp_bb(fd[loop], bufmgr[loop], + rings[loop].id, ts_bo[loop], + shared_bo[loop], write); + if(write) + ts2_bb[loop] = create_timestamp_bb(fd2[loop], bufmgr2[loop], + rings[loop].id, ts2_bo[loop], + NULL, false); + } + + /* Flush batchbuffers */ + for(loop = 0; loop < in_flight; loop++) + intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], rings[ring].id); + + intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id); + for(loop = 0; loop < NBR_RINGS; loop++) + if((loop != ring) && rings[loop].exists) + intel_batchbuffer_flush_on_ring(ts_bb[loop], rings[loop].id); + + if(write) { + intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id); + for(loop = 0; loop < NBR_RINGS; loop++) + if((loop != ring) && rings[loop].exists) + intel_batchbuffer_flush_on_ring(ts2_bb[loop], rings[loop].id); + } + + /* This will not return until the bo has finished executing */ + drm_intel_bo_map(delay_bo, 0); + delay_buf = delay_bo->virtual; + for(loop = 0; loop < NBR_RINGS; loop++) { + drm_intel_bo_map(ts_bo[loop], 0); + ts_buf[loop] = ts_bo[loop]->virtual; + if(write) { + drm_intel_bo_map(ts2_bo[loop], 0); + ts2_buf[loop] = ts2_bo[loop]->virtual; + } + } + + /* buf[0] in the target buffer should be 0 if the batch buffer completed */ + igt_assert_f(delay_buf[0] == 0, + "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", + delay_buf[0]); + + igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n", + rings[ring].name, delay_buf[2]); + for(loop = 0; loop < NBR_RINGS; loop++) + igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", + rings[loop].name, ts_buf[loop][0]); + + if(write) { + igt_debug("Independant batch buffers\n"); + for(loop = 0; loop < NBR_RINGS; loop++) + igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n", + rings[loop].name, ts2_buf[loop][0]); + } + + for(loop = 0; loop < NBR_RINGS; loop++) { + if((loop != ring) && rings[loop].exists) { + if(write) { + /* Write dependency, delayed ring should run first */ + igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]), + "%s ran before %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n", + rings[loop].name, rings[ring].name, + ts_buf[loop][0], ts_buf[ring][0]); + /* Second bb without dependency should run first */ + igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], ts_buf[loop][0]), + "(%s) independant bb was held up - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n", + rings[loop].name, ts_buf[loop][0], ts2_buf[loop][0]); + } + else + /* Read dependency, delayed ring should run last */ + igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]), + "%s ran after %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n", + rings[loop].name, rings[ring].name, + ts_buf[loop][0], ts_buf[ring][0]); + } + } + + /* Cleanup */ + for(loop = 0; loop < in_flight; loop++) + intel_batchbuffer_free(in_flight_bbs[loop]); + + for(loop = 0; loop < NBR_RINGS; loop++) { + intel_batchbuffer_free(ts_bb[loop]); + drm_intel_bo_unreference(ts_bo[loop]); + drm_intel_bo_unreference(shared_bo[loop]); + if(write) { + intel_batchbuffer_free(ts2_bb[loop]); + drm_intel_bo_unreference(ts2_bo[loop]); + } + } + + drm_intel_bo_unreference(delay_bo);
You can move this unreferencing to before the above loop and then squash the 2 loops onNBR_RINGSinto one.
Regards, Daniele
+ + for(loop = 0; loop < NBR_RINGS; loop++) { + drm_intel_bufmgr_destroy(bufmgr[loop]); + close(fd[loop]); + if(write) { + drm_intel_bufmgr_destroy(bufmgr2[loop]); + close(fd2[loop]); + } + } + + free(in_flight_bbs); +} + +igt_main +{ + int loop; + int in_flight; + int fd; + + igt_fixture { + int debug_fd; + int l; + char buf[6]; + /* Get nbr of batch buffers that the scheduler will queue in the + * HW. If this debugfs file does not exist there is no scheduler + * so skip the test. + */ + debug_fd = igt_debugfs_open("i915_scheduler_min_flying", O_RDONLY); + igt_skip_on(debug_fd == -1); + l = read(debug_fd, buf, sizeof(buf)-1); + igt_assert(l > 0); + igt_assert(l < sizeof(buf)); + buf[l] = '\0'; + /* May be a decimal or hex number depending on scheduler version */ + if(sscanf(buf, "0x%2x", &in_flight) != 1) + igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1, + "Error reading from i915_scheduler_min_flying\n"); + close(debug_fd); + igt_debug("in flight = %d\n", in_flight); + fd = drm_open_driver(DRIVER_INTEL); + igt_assert(fd >= 0); + check_rings(fd); + } + + for (loop=0; loop < NBR_RINGS; loop++) + igt_subtest_f("%s-basic", rings[loop].name) { + gem_require_ring(fd, rings[loop].id); + run_test_basic(in_flight, rings[loop].id); + } + + for (loop=0; loop < NBR_RINGS; loop++) + igt_subtest_f("%s-read", rings[loop].name) { + gem_require_ring(fd, rings[loop].id); + run_test_dependency(in_flight, loop, false); + } + + for (loop=0; loop < NBR_RINGS; loop++) + igt_subtest_f("%s-write", rings[loop].name) { + gem_require_ring(fd, rings[loop].id); + run_test_dependency(in_flight, loop, true); + } + + igt_fixture { + close(fd); + } +}
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx