Quoting Janusz Krzysztofik (2019-12-02 14:42:58) > Hi Chris, > > I have a few questions rather than comments. I hope they are worth spending > your time. > > On Wednesday, November 13, 2019 1:52:40 PM CET Chris Wilson wrote: > > I915_CONTEXT_PARAM_RINGSIZE specifies how large to create the command > > ringbuffer for logical ring contects. This directly affects the number > > s/contects/contexts/ > > > of batches userspace can submit before blocking waiting for space. > > > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > --- > > tests/Makefile.sources | 3 + > > tests/i915/gem_ctx_ringsize.c | 296 ++++++++++++++++++++++++++++++++++ > > tests/meson.build | 1 + > > 3 files changed, 300 insertions(+) > > create mode 100644 tests/i915/gem_ctx_ringsize.c > > > > diff --git a/tests/Makefile.sources b/tests/Makefile.sources > > index e17d43155..801fc52f3 100644 > > --- a/tests/Makefile.sources > > +++ b/tests/Makefile.sources > > @@ -163,6 +163,9 @@ gem_ctx_param_SOURCES = i915/gem_ctx_param.c > > TESTS_progs += gem_ctx_persistence > > gem_ctx_persistence_SOURCES = i915/gem_ctx_persistence.c > > > > +TESTS_progs += gem_ctx_ringsize > > +gem_ctx_ringsize_SOURCES = i915/gem_ctx_ringsize.c > > + > > TESTS_progs += gem_ctx_shared > > gem_ctx_shared_SOURCES = i915/gem_ctx_shared.c > > > > diff --git a/tests/i915/gem_ctx_ringsize.c b/tests/i915/gem_ctx_ringsize.c > > new file mode 100644 > > index 000000000..1450e8f0d > > --- /dev/null > > +++ b/tests/i915/gem_ctx_ringsize.c > > @@ -0,0 +1,296 @@ > > +/* > > + * Copyright © 2019 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person obtaining a > > + * copy of this software and associated documentation files (the "Software"), > > + * to deal in the Software without restriction, including without limitation > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice (including the next > > + * paragraph) shall be included in all copies or substantial portions of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > > + * IN THE SOFTWARE. > > + */ > > + > > +#include <errno.h> > > +#include <fcntl.h> > > +#include <inttypes.h> > > +#include <sys/ioctl.h> > > +#include <sys/types.h> > > +#include <unistd.h> > > + > > +#include "drmtest.h" /* gem_quiescent_gpu()! */ > > +#include "i915/gem_context.h" > > +#include "i915/gem_engine_topology.h" > > +#include "ioctl_wrappers.h" /* gem_wait()! */ > > +#include "sw_sync.h" > > + > > +#define I915_CONTEXT_PARAM_RINGSIZE 0xc > > How are we going to handle symbol redefinition conflict which arises as soon > as this symbol is also included from kernel headers (e.g. via > "i915/gem_engine_topology.h")? Final version we copy the headers form the kernel. Conflicts remind us when we forget. > > > + > > +static bool has_ringsize(int i915) > > +{ > > + struct drm_i915_gem_context_param p = { > > + .param = I915_CONTEXT_PARAM_RINGSIZE, > > + }; > > + > > + return __gem_context_get_param(i915, &p) == 0; > > +} > > + > > +static void test_idempotent(int i915) > > +{ > > + struct drm_i915_gem_context_param p = { > > + .param = I915_CONTEXT_PARAM_RINGSIZE, > > + }; > > + uint32_t saved; > > + > > + /* > > + * Simple test to verify that we are able to read back the same > > + * value as we set. > > + */ > > + > > + gem_context_get_param(i915, &p); > > + saved = p.value; > > + > > + for (uint32_t x = 1 << 12; x <= 128 << 12; x <<= 1) { > > I've noticed you are using two different notations for those minimum/maximum > constants. I think that may be confusing. How about defining and using > macros? A range in pages... > > + p.value = x; > > + gem_context_set_param(i915, &p); > > + gem_context_get_param(i915, &p); > > + igt_assert_eq_u32(p.value, x); > > + } > > + > > + p.value = saved; > > + gem_context_set_param(i915, &p); > > +} > > + > > +static void test_invalid(int i915) > > +{ > > + struct drm_i915_gem_context_param p = { > > + .param = I915_CONTEXT_PARAM_RINGSIZE, > > + }; > > + uint64_t invalid[] = { > > + 0, 1, 4095, 4097, 8191, 8193, > > + /* upper limit may be HW dependent, atm it is 512KiB */ > > + (512 << 10) - 1, (512 << 10) + 1, > > Here is an example of that different notation mentioned above. And here written in KiB to match comments. > > > + -1, -1u > > + }; > > + uint32_t saved; > > + > > + gem_context_get_param(i915, &p); > > + saved = p.value; > > + > > + for (int i = 0; i < ARRAY_SIZE(invalid); i++) { > > + p.value = invalid[i]; > > + igt_assert_eq(__gem_context_set_param(i915, &p), -EINVAL); > > + gem_context_get_param(i915, &p); > > + igt_assert_eq_u64(p.value, saved); > > + } > > +} > > + > > +static int create_ext_ioctl(int i915, > > + struct drm_i915_gem_context_create_ext *arg) > > +{ > > + int err; > > + > > + err = 0; > > + if (igt_ioctl(i915, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, arg)) { > > + err = -errno; > > + igt_assume(err); > > + } > > + > > + errno = 0; > > + return err; > > +} > > This helper looks like pretty standard for me. Why there are no library > functions for such generic operations? Because no one has written that yet. > > > + > > +static void test_create(int i915) > > +{ > > + struct drm_i915_gem_context_create_ext_setparam p = { > > + .base = { > > + .name = I915_CONTEXT_CREATE_EXT_SETPARAM, > > + .next_extension = 0, /* end of chain */ > > + }, > > + .param = { > > + .param = I915_CONTEXT_PARAM_RINGSIZE, > > + .value = 512 << 10, > > + } > > + }; > > + struct drm_i915_gem_context_create_ext create = { > > + .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, > > + .extensions = to_user_pointer(&p), > > + }; > > + > > + igt_assert_eq(create_ext_ioctl(i915, &create), 0); > > + > > + p.param.ctx_id = create.ctx_id; > > + p.param.value = 0; > > + gem_context_get_param(i915, &p.param); > > + igt_assert_eq(p.param.value, 512 << 10); > > + > > + gem_context_destroy(i915, create.ctx_id); > > +} > > + > > +static void test_clone(int i915) > > +{ > > + struct drm_i915_gem_context_create_ext_setparam p = { > > + .base = { > > + .name = I915_CONTEXT_CREATE_EXT_SETPARAM, > > + .next_extension = 0, /* end of chain */ > > + }, > > + .param = { > > + .param = I915_CONTEXT_PARAM_RINGSIZE, > > + .value = 512 << 10, > > + } > > + }; > > + struct drm_i915_gem_context_create_ext create = { > > + .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, > > + .extensions = to_user_pointer(&p), > > + }; > > + > > + igt_assert_eq(create_ext_ioctl(i915, &create), 0); > > + > > + p.param.ctx_id = gem_context_clone(i915, create.ctx_id, > > + I915_CONTEXT_CLONE_ENGINES, 0); > > + igt_assert_neq(p.param.ctx_id, create.ctx_id); > > + gem_context_destroy(i915, create.ctx_id); > > + > > + p.param.value = 0; > > + gem_context_get_param(i915, &p.param); > > + igt_assert_eq(p.param.value, 512 << 10); > > + > > + gem_context_destroy(i915, p.param.ctx_id); > > +} > > + > > +static int __execbuf(int i915, struct drm_i915_gem_execbuffer2 *execbuf) > > +{ > > + int err; > > + > > + err = 0; > > + if (ioctl(i915, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf)) > > + err = -errno; > > + > > + errno = 0; > > + return err; > > +} > > The above helper looks pretty the same as lib/ioctlwrappers.c:__gem_execbuf(). > Does igt_assume(err) found in the latter matter so much that you use your own > version? It's very, very different from that one. > > + > > +static uint32_t __batch_create(int i915, uint32_t offset) > > This is always called with offset = 0, do we expect other values to be used > later? Why not. > > +{ > > + const uint32_t bbe = 0xa << 23; > > + uint32_t handle; > > + > > + handle = gem_create(i915, ALIGN(offset + sizeof(bbe), 4096)); > > Why don't we rely on the driver making the alignment for us? I'm used to being inside the kernel where it's expected to be correct. > > + gem_write(i915, handle, offset, &bbe, sizeof(bbe)); > > + > > + return handle; > > +} > > + > > +static uint32_t batch_create(int i915) > > +{ > > + return __batch_create(i915, 0); > > +} > > + > > +static unsigned int measure_inflight(int i915, unsigned int engine) > > +{ > > + IGT_CORK_FENCE(cork); > > + struct drm_i915_gem_exec_object2 obj = { > > + .handle = batch_create(i915) > > + }; > > + struct drm_i915_gem_execbuffer2 execbuf = { > > + .buffers_ptr = to_user_pointer(&obj), > > + .buffer_count = 1, > > + .flags = engine | I915_EXEC_FENCE_IN, > > + .rsvd2 = igt_cork_plug(&cork, i915), > > + }; > > + unsigned int count; > > + > > + fcntl(i915, F_SETFL, fcntl(i915, F_GETFL) | O_NONBLOCK); > > + > > + gem_execbuf(i915, &execbuf); > > + for (count = 1; __execbuf(i915, &execbuf) == 0; count++) > > + ; > > Shouldn't we check if the reason for the failure is what we expect, i.e., > -EWOULDBLOCK (or -EINTR)? And why don't we put a time constraint on that loop > in case O_NONBLOCK handling is not supported (yet)? Sure. The idea is that O_NONBLOCK is supported, otherwise we don't have fast and precise feedback. > > +static void test_resize(int i915, > > + const struct intel_execution_engine2 *e, > > + unsigned int flags) > > +#define IDLE (1 << 0) > > +{ > > + struct drm_i915_gem_context_param p = { > > + .param = I915_CONTEXT_PARAM_RINGSIZE, > > + }; > > + unsigned int prev[2] = {}; > > + uint32_t saved; > > + > > + gem_context_get_param(i915, &p); > > + saved = p.value; > > + > > + gem_quiescent_gpu(i915); > > + for (p.value = 1 << 12; p.value <= 128 << 12; p.value <<= 1) { > > + unsigned int count; > > + > > + gem_context_set_param(i915, &p); > > + > > + count = measure_inflight(i915, e->flags); > > + igt_info("%s: %llx -> %d\n", e->name, p.value, count); > > + igt_assert(count > 3 * (prev[1] - prev[0]) / 4 + prev[1]); > > Where does this formula come from? Why not just count == 2 * prev[1] ? > What results should we expect in "active" vs. "idle" mode? I've explained somewhere why it is not 2*prev... And there's a small amount of imprecision (+-1 request). In test_resize is the comment: /* * The ringsize directly affects the number of batches we can have * inflight -- when we run out of room in the ring, the client is * blocked (or if O_NONBLOCK is specified, -EWOULDBLOCK is reported). * The kernel throttles the client when they enter the last 4KiB page, * so as we double the size of the ring, we nearly double the number * of requests we can fit as 2^n-1: i.e 0, 1, 3, 7, 15, 31 pages. */ -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx