On Mon, Oct 05, 2015 at 02:42:25PM +0300, Antti Koskipaa wrote: > This is a benchmark for testing the GPU read and write bandwidth. > > Issue: VIZ-5664 > Signed-off-by: Antti Koskipaa <antti.koskipaa@xxxxxxxxxxxxxxx> > --- > tests/.gitignore | 1 + > tests/Makefile.sources | 1 + > tests/gem_memory_bandwidth.c | 209 +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 211 insertions(+) > create mode 100644 tests/gem_memory_bandwidth.c > > diff --git a/tests/.gitignore b/tests/.gitignore > index dc8bb53..2ea4107 100644 > --- a/tests/.gitignore > +++ b/tests/.gitignore > @@ -65,6 +65,7 @@ gem_linear_blits > gem_lut_handle > gem_madvise > gem_media_fill > +gem_memory_bandwidth > gem_mmap > gem_mmap_gtt > gem_mmap_offset_exhaustion > diff --git a/tests/Makefile.sources b/tests/Makefile.sources > index 2e2e088..4429c29 100644 > --- a/tests/Makefile.sources > +++ b/tests/Makefile.sources > @@ -36,6 +36,7 @@ TESTS_progs_M = \ > gem_flink_race \ > gem_linear_blits \ > gem_madvise \ > + gem_memory_bandwidth \ > gem_mmap \ > gem_mmap_gtt \ > gem_mmap_wc \ > diff --git a/tests/gem_memory_bandwidth.c b/tests/gem_memory_bandwidth.c > new file mode 100644 > index 0000000..a44987e > --- /dev/null > +++ b/tests/gem_memory_bandwidth.c > @@ -0,0 +1,209 @@ > +/* > + * Copyright © 2013-2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Antti Koskip?? <antti.koskipaa@xxxxxxxxx> > + * Damien Lespiau <damien.lespiau@xxxxxxxxx> > + */ > + > +/* > + * We need a way to test memory bandwidth bottlenecks and understand better > + * where they are. This test bypasses Mesa and uses the kernel GEM interface > + * directly. > + * > + * Say there is a performance regression. Where is it, Mesa or kernel? Just > + * compare the results of this test to the Mesa bandwidth results. If they are > + * similar, the problem is in the kernel. If Mesa is much slower than this test, > + * the problem is in Mesa. > + */ > + > +#include "igt.h" > +#include <stdbool.h> > +#include <unistd.h> > +#include <stdlib.h> > +#include <sys/ioctl.h> > +#include <stdio.h> > +#include <string.h> > +#include <fcntl.h> > +#include <inttypes.h> > +#include <errno.h> > +#include <sys/stat.h> > +#include <sys/time.h> > + > +#include <drm.h> > + > +#include "intel_bufmgr.h" > + > +IGT_TEST_DESCRIPTION("GPU memory bandwidth benchmark."); > + > +/* Each test block is 1 meg. */ > +#define WIDTH 512 > +#define STRIDE (WIDTH*4) > +#define HEIGHT 512 > +#define SIZE (HEIGHT*STRIDE) > +/* Run this many times. 10240 = 10 gigabytes are copied. 1024 was too small, > + * giving too much variance. */ > +#define LOOPS 10240 > +/* How many buffers to allocate for main memory speed testing. > + * Must be large enough to thrash the caches. > + */ > +#define NBUFS 512 > + > +#define SRC_COLOR 0xffff00ff > +#define DST_COLOR 0xfff0ff00 > + > +typedef struct { > + int fd; > + uint32_t devid; > + drm_intel_bufmgr *bufmgr; > + struct intel_batchbuffer *batch; > + igt_render_copyfunc_t render_copy; > + igt_render_copyfunc_t render_read; > + igt_render_copyfunc_t render_write; > + uint32_t linear[WIDTH * HEIGHT]; > +} data_t; > + > +static void data_init(data_t *data) > +{ > + data->fd = drm_open_driver(DRIVER_INTEL); > + data->devid = intel_get_drm_devid(data->fd); > + > + data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096); > + igt_assert(data->bufmgr); > + > + data->render_copy = igt_get_render_copyfunc(data->devid); > + igt_require_f(data->render_copy, > + "no render-copy function\n"); > + data->render_write = igt_get_render_writefunc(data->devid); > + igt_require_f(data->render_write, > + "no render-write function\n"); > + data->render_read = igt_get_render_readfunc(data->devid); > + igt_require_f(data->render_read, > + "no render-read function\n"); > + > + data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid); > + igt_assert(data->batch); > + > + for (int i = 0; i < WIDTH * HEIGHT; i++) > + data->linear[i] = i; > + > +} > + > +static void data_fini(data_t *data) > +{ > + intel_batchbuffer_free(data->batch); > + drm_intel_bufmgr_destroy(data->bufmgr); > + close(data->fd); > +} > + > +static int scratch_buf_init(data_t *data, struct igt_buf *buf, > + int width, int height, int stride, uint32_t color) > +{ > + drm_intel_bo *bo; > + > + bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096); > + if (!bo) > + return -1; > + gem_write(data->fd, bo->handle, 0, data->linear, > + sizeof(data->linear)); > + > + buf->bo = bo; > + buf->stride = stride; > + buf->tiling = I915_TILING_NONE; > + buf->size = SIZE; > + return 0; > +} > + > +static void scratch_buf_fini(data_t *data, struct igt_buf *buf) > +{ > + dri_bo_unreference(buf->bo); > + memset(buf, 0, sizeof(*buf)); > +} > + > +static void print_bandwidth(const char *desc, struct timeval *start, struct timeval *end) > +{ > + struct timeval diff; > + uint64_t usecs; > + timersub(end, start, &diff); > + usecs = diff.tv_sec * 1000000ULL + diff.tv_usec; > + igt_assert(usecs != 0); > + /* 1 byte/us = 1M/s */ > + printf("%s: %i MB in %f seconds, %f MB/s\n", desc, > + LOOPS, (float)usecs / 1.0e6, > + (float)(STRIDE*HEIGHT*(uint64_t)LOOPS) / (float)usecs); > +} > + > +static void test(data_t *data, struct igt_buf *bufs, igt_render_copyfunc_t func, > + const char *desc) > +{ > + int i; > + struct timeval start, end; > + > + gettimeofday(&start, NULL); > + > + for (i = 0; i < LOOPS; i++) > + func(data->batch, NULL, &bufs[i % NBUFS], 0, 0, WIDTH, HEIGHT, > + &bufs[(i + 1) % NBUFS], WIDTH / 2, HEIGHT / 2); > + > + gettimeofday(&end, NULL); > + print_bandwidth(desc, &start, &end); > +} > + > +int main(int argc, char **argv) > +{ > + data_t data = {0, }; > + struct igt_buf bufs[NBUFS]; > + > + igt_subtest_init(argc, argv); > + igt_main { > + igt_fixture { > + data_init(&data); > + for (int i = 0; i < NBUFS; i++) > + if (scratch_buf_init(&data, &bufs[i], WIDTH, HEIGHT, STRIDE, SRC_COLOR)) { > + printf("Not enough memory to allocate all scratch buffers. Need" \ > + "%i megabytes more.\n", NBUFS - i); > + for (i--; i >= 0; i--) > + scratch_buf_fini(&data, &bufs[i]); > + igt_fail(IGT_EXIT_FAILURE); > + } > + } > + > + igt_subtest("copy") { > + test(&data, bufs, data.render_copy, "Copy"); > + } > + > + igt_subtest("write") { > + test(&data, bufs, data.render_write, "Write"); > + } > + > + igt_subtest("read") { > + test(&data, bufs, data.render_read, "Read"); > + } > + > + igt_fixture { > + for (int i = 0; i < NBUFS; i++) > + scratch_buf_fini(&data, &bufs[i]); > + data_fini(&data); > + } } And you can drop igt_exit and igt_subtest_init. -Daniel > + > + igt_exit(); > +} > -- > 2.3.6 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx