This is a benchmark for testing the GPU read and write bandwidth. Issue: VIZ-5664 Signed-off-by: Antti Koskipaa <antti.koskipaa@xxxxxxxxxxxxxxx> --- tests/.gitignore | 1 + tests/Makefile.sources | 1 + tests/gem_memory_bandwidth.c | 209 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 211 insertions(+) create mode 100644 tests/gem_memory_bandwidth.c diff --git a/tests/.gitignore b/tests/.gitignore index dc8bb53..2ea4107 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -65,6 +65,7 @@ gem_linear_blits gem_lut_handle gem_madvise gem_media_fill +gem_memory_bandwidth gem_mmap gem_mmap_gtt gem_mmap_offset_exhaustion diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 2e2e088..4429c29 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -36,6 +36,7 @@ TESTS_progs_M = \ gem_flink_race \ gem_linear_blits \ gem_madvise \ + gem_memory_bandwidth \ gem_mmap \ gem_mmap_gtt \ gem_mmap_wc \ diff --git a/tests/gem_memory_bandwidth.c b/tests/gem_memory_bandwidth.c new file mode 100644 index 0000000..a44987e --- /dev/null +++ b/tests/gem_memory_bandwidth.c @@ -0,0 +1,209 @@ +/* + * Copyright © 2013-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Antti Koskip�<antti.koskipaa@xxxxxxxxx> + * Damien Lespiau <damien.lespiau@xxxxxxxxx> + */ + +/* + * We need a way to test memory bandwidth bottlenecks and understand better + * where they are. This test bypasses Mesa and uses the kernel GEM interface + * directly. + * + * Say there is a performance regression. Where is it, Mesa or kernel? Just + * compare the results of this test to the Mesa bandwidth results. If they are + * similar, the problem is in the kernel. If Mesa is much slower than this test, + * the problem is in Mesa. + */ + +#include "igt.h" +#include <stdbool.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/time.h> + +#include <drm.h> + +#include "intel_bufmgr.h" + +IGT_TEST_DESCRIPTION("GPU memory bandwidth benchmark."); + +/* Each test block is 1 meg. */ +#define WIDTH 512 +#define STRIDE (WIDTH*4) +#define HEIGHT 512 +#define SIZE (HEIGHT*STRIDE) +/* Run this many times. 10240 = 10 gigabytes are copied. 1024 was too small, + * giving too much variance. */ +#define LOOPS 10240 +/* How many buffers to allocate for main memory speed testing. + * Must be large enough to thrash the caches. + */ +#define NBUFS 512 + +#define SRC_COLOR 0xffff00ff +#define DST_COLOR 0xfff0ff00 + +typedef struct { + int fd; + uint32_t devid; + drm_intel_bufmgr *bufmgr; + struct intel_batchbuffer *batch; + igt_render_copyfunc_t render_copy; + igt_render_copyfunc_t render_read; + igt_render_copyfunc_t render_write; + uint32_t linear[WIDTH * HEIGHT]; +} data_t; + +static void data_init(data_t *data) +{ + data->fd = drm_open_driver(DRIVER_INTEL); + data->devid = intel_get_drm_devid(data->fd); + + data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096); + igt_assert(data->bufmgr); + + data->render_copy = igt_get_render_copyfunc(data->devid); + igt_require_f(data->render_copy, + "no render-copy function\n"); + data->render_write = igt_get_render_writefunc(data->devid); + igt_require_f(data->render_write, + "no render-write function\n"); + data->render_read = igt_get_render_readfunc(data->devid); + igt_require_f(data->render_read, + "no render-read function\n"); + + data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid); + igt_assert(data->batch); + + for (int i = 0; i < WIDTH * HEIGHT; i++) + data->linear[i] = i; + +} + +static void data_fini(data_t *data) +{ + intel_batchbuffer_free(data->batch); + drm_intel_bufmgr_destroy(data->bufmgr); + close(data->fd); +} + +static int scratch_buf_init(data_t *data, struct igt_buf *buf, + int width, int height, int stride, uint32_t color) +{ + drm_intel_bo *bo; + + bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096); + if (!bo) + return -1; + gem_write(data->fd, bo->handle, 0, data->linear, + sizeof(data->linear)); + + buf->bo = bo; + buf->stride = stride; + buf->tiling = I915_TILING_NONE; + buf->size = SIZE; + return 0; +} + +static void scratch_buf_fini(data_t *data, struct igt_buf *buf) +{ + dri_bo_unreference(buf->bo); + memset(buf, 0, sizeof(*buf)); +} + +static void print_bandwidth(const char *desc, struct timeval *start, struct timeval *end) +{ + struct timeval diff; + uint64_t usecs; + timersub(end, start, &diff); + usecs = diff.tv_sec * 1000000ULL + diff.tv_usec; + igt_assert(usecs != 0); + /* 1 byte/us = 1M/s */ + printf("%s: %i MB in %f seconds, %f MB/s\n", desc, + LOOPS, (float)usecs / 1.0e6, + (float)(STRIDE*HEIGHT*(uint64_t)LOOPS) / (float)usecs); +} + +static void test(data_t *data, struct igt_buf *bufs, igt_render_copyfunc_t func, + const char *desc) +{ + int i; + struct timeval start, end; + + gettimeofday(&start, NULL); + + for (i = 0; i < LOOPS; i++) + func(data->batch, NULL, &bufs[i % NBUFS], 0, 0, WIDTH, HEIGHT, + &bufs[(i + 1) % NBUFS], WIDTH / 2, HEIGHT / 2); + + gettimeofday(&end, NULL); + print_bandwidth(desc, &start, &end); +} + +int main(int argc, char **argv) +{ + data_t data = {0, }; + struct igt_buf bufs[NBUFS]; + + igt_subtest_init(argc, argv); + + igt_fixture { + data_init(&data); + for (int i = 0; i < NBUFS; i++) + if (scratch_buf_init(&data, &bufs[i], WIDTH, HEIGHT, STRIDE, SRC_COLOR)) { + printf("Not enough memory to allocate all scratch buffers. Need" \ + "%i megabytes more.\n", NBUFS - i); + for (i--; i >= 0; i--) + scratch_buf_fini(&data, &bufs[i]); + igt_fail(IGT_EXIT_FAILURE); + } + } + + igt_subtest("copy") { + test(&data, bufs, data.render_copy, "Copy"); + } + + igt_subtest("write") { + test(&data, bufs, data.render_write, "Write"); + } + + igt_subtest("read") { + test(&data, bufs, data.render_read, "Read"); + } + + igt_fixture { + for (int i = 0; i < NBUFS; i++) + scratch_buf_fini(&data, &bufs[i]); + data_fini(&data); + } + + igt_exit(); +} -- 2.3.6
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx