Like benchmarks/gem_syslatency, but to investigate/compare the impact with amdgpu. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- lib/igt_aux.h | 1 + tests/Makefile.am | 2 + tests/Makefile.sources | 1 + tests/amdgpu/amd_syslatency.c | 404 ++++++++++++++++++++++++++++++++++ tests/amdgpu/meson.build | 1 + 5 files changed, 409 insertions(+) create mode 100644 tests/amdgpu/amd_syslatency.c diff --git a/lib/igt_aux.h b/lib/igt_aux.h index 9a962881b..3641e4ee3 100644 --- a/lib/igt_aux.h +++ b/lib/igt_aux.h @@ -33,6 +33,7 @@ #include <stdbool.h> #include <stddef.h> #include <sys/time.h> +#include <sys/syscall.h> #include <i915/gem_submission.h> diff --git a/tests/Makefile.am b/tests/Makefile.am index f41ad5096..69300448a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -156,5 +156,7 @@ amdgpu_amd_cs_nop_CFLAGS = $(AM_CFLAGS) $(DRM_AMDGPU_CFLAGS) amdgpu_amd_cs_nop_LDADD = $(LDADD) $(DRM_AMDGPU_LIBS) amdgpu_amd_prime_CFLAGS = $(AM_CFLAGS) $(DRM_AMDGPU_CFLAGS) amdgpu_amd_prime_LDADD = $(LDADD) $(DRM_AMDGPU_LIBS) +amdgpu_amd_syslatency_CFLAGS = $(AM_CFLAGS) $(DRM_AMDGPU_CFLAGS) -pthread +amdgpu_amd_syslatency_LDADD = $(LDADD) $(DRM_AMDGPU_LIBS) -lpthread endif diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 54b4a3c21..002af360e 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -19,6 +19,7 @@ AMDGPU_TESTS = \ amdgpu/amd_basic \ amdgpu/amd_cs_nop \ amdgpu/amd_prime \ + amdgpu/amd_syslatency \ $(NULL) TESTS_progs = \ diff --git a/tests/amdgpu/amd_syslatency.c b/tests/amdgpu/amd_syslatency.c new file mode 100644 index 000000000..b4fb2fc01 --- /dev/null +++ b/tests/amdgpu/amd_syslatency.c @@ -0,0 +1,404 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "igt.h" +#include "drmtest.h" + +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <amdgpu.h> +#include <amdgpu_drm.h> + +#include "igt_stats.h" + +#define GFX_COMPUTE_NOP 0xffff1000 +#define SDMA_NOP 0x0 + +static int +amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size, + unsigned alignment, unsigned heap, uint64_t flags, + amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address, + amdgpu_va_handle *va_handle) +{ + struct amdgpu_bo_alloc_request request = { + .alloc_size = size, + .phys_alignment = alignment, + .preferred_heap = heap, + .flags = flags, + }; + amdgpu_bo_handle buf_handle; + amdgpu_va_handle handle; + uint64_t vmc_addr; + int r; + + r = amdgpu_bo_alloc(dev, &request, &buf_handle); + if (r) + return r; + + r = amdgpu_va_range_alloc(dev, + amdgpu_gpu_va_range_general, + size, alignment, 0, &vmc_addr, + &handle, 0); + if (r) + goto error_va_alloc; + + r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP); + if (r) + goto error_va_map; + + r = amdgpu_bo_cpu_map(buf_handle, cpu); + if (r) + goto error_cpu_map; + + *bo = buf_handle; + *mc_address = vmc_addr; + *va_handle = handle; + + return 0; + +error_cpu_map: + amdgpu_bo_cpu_unmap(buf_handle); + +error_va_map: + amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); + +error_va_alloc: + amdgpu_bo_free(buf_handle); + return r; +} + +static void +amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle, + uint64_t mc_addr, uint64_t size) +{ + amdgpu_bo_cpu_unmap(bo); + amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_va_range_free(va_handle); + amdgpu_bo_free(bo); +} + +static void amdgpu_cs_sync(amdgpu_context_handle context, + unsigned int ip_type, + int ring, + unsigned int seqno) +{ + struct amdgpu_cs_fence fence = { + .context = context, + .ip_type = ip_type, + .ring = ring, + .fence = seqno, + }; + uint32_t expired; + int err; + + err = amdgpu_cs_query_fence_status(&fence, + AMDGPU_TIMEOUT_INFINITE, + 0, &expired); + igt_assert_eq(err, 0); +} + +static volatile int done; + +struct busyspin { + pthread_t thread; + unsigned long count; + amdgpu_device_handle device; + unsigned int ip_type; + unsigned int ring; +}; + +struct sys_wait { + pthread_t thread; + struct igt_mean mean; +}; + +static void force_low_latency(void) +{ + int32_t target = 0; + int fd = open("/dev/cpu_dma_latency", O_RDWR); + if (fd < 0 || write(fd, &target, sizeof(target)) < 0) + fprintf(stderr, + "Unable to prevent CPU sleeps and force low latency using /dev/cpu_dma_latency: %s\n", + strerror(errno)); +} + +static void *busyspin(void *arg) +{ + struct busyspin *bs = arg; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; + amdgpu_context_handle context; + struct amdgpu_cs_request ibs_request; + struct amdgpu_cs_ib_info ib_info; + uint32_t *ptr; + amdgpu_bo_list_handle bo_list; + amdgpu_va_handle va_handle; + int i, r; + + amdgpu_cs_ctx_create(bs->device, &context); + + r = amdgpu_bo_alloc_and_map(bs->device, 4096, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address, &va_handle); + igt_assert_eq(r, 0); + + ptr = ib_result_cpu; + for (i = 0; i < 16; ++i) + ptr[i] = GFX_COMPUTE_NOP; + + r = amdgpu_bo_list_create(bs->device, 1, &ib_result_handle, NULL, &bo_list); + igt_assert_eq(r, 0); + + + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address; + ib_info.size = 16; + + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = bs->ip_type; + ibs_request.ring = bs->ring; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.resources = bo_list; + + bs->count = 0; + while (!done) { + amdgpu_cs_submit(context, 0, &ibs_request, 1); + bs->count++; + } + + amdgpu_cs_sync(context, bs->ip_type, bs->ring, ibs_request.seq_no); + + r = amdgpu_bo_list_destroy(bo_list); + igt_assert_eq(r, 0); + + amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, + ib_result_mc_address, 4096); + + amdgpu_cs_ctx_free(context); + + return NULL; +} + +static double elapsed(const struct timespec *a, const struct timespec *b) +{ + return 1e9*(b->tv_sec - a->tv_sec) + (b->tv_nsec - a ->tv_nsec); +} + +static void *sys_wait(void *arg) +{ + struct sys_wait *w = arg; + struct sigevent sev; + timer_t timer; + sigset_t mask; + struct timespec now; +#define SIG SIGRTMIN + + sigemptyset(&mask); + sigaddset(&mask, SIG); + sigprocmask(SIG_SETMASK, &mask, NULL); + + sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID; + sev.sigev_notify_thread_id = gettid(); + sev.sigev_signo = SIG; + timer_create(CLOCK_MONOTONIC, &sev, &timer); + + clock_gettime(CLOCK_MONOTONIC, &now); + while (!done) { + struct itimerspec its; + int sigs; + + its.it_value = now; + its.it_value.tv_nsec += 100 * 1000; + its.it_value.tv_nsec += rand() % (NSEC_PER_SEC / 1000); + if (its.it_value.tv_nsec >= NSEC_PER_SEC) { + its.it_value.tv_nsec -= NSEC_PER_SEC; + its.it_value.tv_sec += 1; + } + its.it_interval.tv_sec = its.it_interval.tv_nsec = 0; + timer_settime(timer, TIMER_ABSTIME, &its, NULL); + + sigwait(&mask, &sigs); + clock_gettime(CLOCK_MONOTONIC, &now); + igt_mean_add(&w->mean, elapsed(&its.it_value, &now)); + } + + sigprocmask(SIG_UNBLOCK, &mask, NULL); + timer_delete(timer); + + return NULL; +} + +static void bind_cpu(pthread_attr_t *attr, int cpu) +{ +#ifdef __USE_GNU + cpu_set_t mask; + + if (cpu == -1) + return; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + + pthread_attr_setaffinity_np(attr, sizeof(mask), &mask); +#endif +} + +static void rtprio(pthread_attr_t *attr, int prio) +{ +#ifdef PTHREAD_EXPLICIT_SCHED + struct sched_param param = { .sched_priority = 99 }; + pthread_attr_setinheritsched(attr, PTHREAD_EXPLICIT_SCHED); + pthread_attr_setschedpolicy(attr, SCHED_FIFO); + pthread_attr_setschedparam(attr, ¶m); +#endif +} + +static double l_estimate(igt_stats_t *stats) +{ + if (stats->n_values > 9) + return igt_stats_get_trimean(stats); + else if (stats->n_values > 5) + return igt_stats_get_median(stats); + else + return igt_stats_get_mean(stats); +} + +static double min_measurement_error(void) +{ + struct timespec start, end; + int n; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (n = 0; n < 1024; n++) + clock_gettime(CLOCK_MONOTONIC, &end); + + return elapsed(&start, &end) / n; +} + +static void syslatency(amdgpu_device_handle device, + double min, + const char *name, + unsigned int ip_type, + unsigned int ring, + unsigned int timeout) +{ + const int ncpus = sysconf(_SC_NPROCESSORS_ONLN); + struct busyspin *busy; + struct sys_wait *wait; + pthread_attr_t attr; + igt_stats_t cycles, mean, max; + int n; + + done = 0; + + busy = calloc(ncpus, sizeof(*busy)); + pthread_attr_init(&attr); + for (n = 0; n < ncpus; n++) { + bind_cpu(&attr, n); + busy[n].device = device; + busy[n].ip_type = ip_type; + busy[n].ring = ring; + pthread_create(&busy[n].thread, &attr, busyspin, &busy[n]); + } + + wait = calloc(ncpus, sizeof(*wait)); + pthread_attr_init(&attr); + rtprio(&attr, 99); + for (n = 0; n < ncpus; n++) { + igt_mean_init(&wait[n].mean); + bind_cpu(&attr, n); + pthread_create(&wait[n].thread, &attr, sys_wait, &wait[n]); + } + + sleep(timeout); + done = 1; + + igt_stats_init_with_size(&cycles, ncpus); + for (n = 0; n < ncpus; n++) { + pthread_join(busy[n].thread, NULL); + igt_stats_push(&cycles, busy[n].count); + } + + igt_stats_init_with_size(&mean, ncpus); + igt_stats_init_with_size(&max, ncpus); + for (n = 0; n < ncpus; n++) { + pthread_join(wait[n].thread, NULL); + igt_stats_push_float(&mean, wait[n].mean.mean); + igt_stats_push_float(&max, wait[n].mean.max); + } + + igt_info("%s: cycles=%.0f, latency mean=%.3fus max=%.0fus\n", + name, + igt_stats_get_mean(&cycles), + (igt_stats_get_mean(&mean) - min)/ 1000, + (l_estimate(&max) - min) / 1000); +} + +igt_main +{ + amdgpu_device_handle device; + const struct engine { + const char *name; + unsigned int ip_type; + } engines[] = { + { "compute", AMDGPU_HW_IP_COMPUTE }, + { "gfx", AMDGPU_HW_IP_GFX }, + { }, + }, *e; + double min; + int fd = -1; + + igt_fixture { + uint32_t major, minor; + int err; + + fd = __drm_open_driver(DRIVER_AMDGPU); + igt_require(fd >= 0); + + err = amdgpu_device_initialize(fd, &major, &minor, &device); + igt_require(err == 0); + + force_low_latency(); + min = min_measurement_error(); + } + + for (e = engines; e->name; e++) { + igt_subtest_f("%s-0", e->name) + syslatency(device, min, e->name, e->ip_type, 0, 20); + } + + igt_fixture { + amdgpu_device_deinitialize(device); + close(fd); + } +} diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build index af5e74c7a..d24a84f68 100644 --- a/tests/amdgpu/meson.build +++ b/tests/amdgpu/meson.build @@ -5,6 +5,7 @@ if libdrm_amdgpu.found() amdgpu_progs += [ 'amd_basic', 'amd_cs_nop', 'amd_prime', + 'amd_syslatency', ] amdgpu_deps += libdrm_amdgpu endif -- 2.18.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx