On 07/02/18 01:50, Chris Wilson wrote:
Build up a large stockpile of requests, ~500,000, and feed them into the
system at 20KHz whilst simultaneously triggering set-wedged in order to
try and race i915_gem_set_wedged() against the engine->submit_request()
callback.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
tests/gem_eio.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 196 insertions(+)
diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index 16a89ecda..10f9aab37 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -34,12 +34,14 @@
#include <fcntl.h>
#include <inttypes.h>
#include <errno.h>
+#include <signal.h>
#include <sys/ioctl.h>
#include <drm.h>
#include "igt.h"
#include "igt_sysfs.h"
+#include "igt_vgem.h"
#include "sw_sync.h"
IGT_TEST_DESCRIPTION("Test that specific ioctls report a wedged GPU (EIO).");
@@ -422,6 +424,194 @@ static void test_inflight_internal(int fd)
trigger_reset(fd);
}
+struct cork {
+ int device;
+ uint32_t handle;
+ uint32_t fence;
+};
+
+static void plug(int fd, struct cork *c)
+{
+ struct vgem_bo bo;
+ int dmabuf;
+
+ c->device = drm_open_driver(DRIVER_VGEM);
+
+ bo.width = bo.height = 1;
+ bo.bpp = 4;
+ vgem_create(c->device, &bo);
+ c->fence = vgem_fence_attach(c->device, &bo, VGEM_FENCE_WRITE);
+
+ dmabuf = prime_handle_to_fd(c->device, bo.handle);
+ c->handle = prime_fd_to_handle(fd, dmabuf);
+ close(dmabuf);
+}
+
+static void unplug(struct cork *c)
+{
+ vgem_fence_signal(c->device, c->fence);
+ close(c->device);
+}
+
+static void alarm_handler(int sig)
+{
+}
+
+static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ int err = 0;
+ if (ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf))
+ err = -errno;
+ return err;
+}
+
+static unsigned int measure_ring_size(int fd, unsigned int ring)
+{
+ struct sigaction sa = { .sa_handler = alarm_handler };
+ struct drm_i915_gem_exec_object2 obj[2];
+ struct drm_i915_gem_execbuffer2 execbuf;
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+ unsigned int count, last;
+ struct itimerval itv;
+ struct cork c;
+
+ memset(obj, 0, sizeof(obj));
+ obj[1].handle = gem_create(fd, 4096);
+ gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = to_user_pointer(obj + 1);
+ execbuf.buffer_count = 1;
+ execbuf.flags = ring;
+ gem_execbuf(fd, &execbuf);
+ gem_sync(fd, obj[1].handle);
+
+ plug(fd, &c);
+ obj[0].handle = c.handle;
+
+ execbuf.buffers_ptr = to_user_pointer(obj);
+ execbuf.buffer_count = 2;
+ execbuf.rsvd1 = gem_context_create(fd);
+
+ sigaction(SIGALRM, &sa, NULL);
+ itv.it_interval.tv_sec = 0;
+ itv.it_interval.tv_usec = 100;
+ itv.it_value.tv_sec = 0;
+ itv.it_value.tv_usec = 1000;
+ setitimer(ITIMER_REAL, &itv, NULL);
+
+ last = -1;
+ count = 0;
+ do {
+ if (__execbuf(fd, &execbuf) == 0) {
+ count++;
+ continue;
+ }
+
+ if (last == count)
+ break;
+
+ last = count;
+ } while (1);
+
+ memset(&itv, 0, sizeof(itv));
+ setitimer(ITIMER_REAL, &itv, NULL);
+
+ unplug(&c);
+ gem_close(fd, obj[1].handle);
+ gem_context_destroy(fd, execbuf.rsvd1);
+
+ return count;
+}
+
+static void notify(union sigval arg)
+{
+ int timeline = arg.sival_int;
+
+ sw_sync_timeline_inc(timeline, 1); /* only now submit our batches */
+}
+
+static void test_set_wedged(int fd)
+{
+#define NCTX 4096
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+ const int ring_size = measure_ring_size(fd, 0) - 1;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 obj;
+ struct itimerspec its;
+ struct sigevent sev;
+ uint32_t *contexts;
+ timer_t timer;
+ int timeline;
+ int syncpt;
+
+ contexts = calloc(NCTX, sizeof(*contexts));
This is pretty static now, will it not be in the future?
+ igt_assert(contexts);
+
+ for (int n = 0; n < NCTX; n++)
+ contexts[n] = gem_context_create(fd);
+
+ timeline = sw_sync_timeline_create();
+
+ memset(&obj, 0, sizeof(obj));
+ obj.handle = gem_create(fd, 4096);
+ gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = to_user_pointer(&obj);
+ execbuf.buffer_count = 1;
+ execbuf.flags = I915_EXEC_FENCE_IN;
+
+ /* Build up a large orderly queue of requests */
+ syncpt = 1;
+ for (int n = 0; n < NCTX; n++) {
+ execbuf.rsvd1 = contexts[n];
+ for (int m = 0; m < ring_size; m++) {
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, syncpt);
+ gem_execbuf(fd, &execbuf);
+ close(execbuf.rsvd2);
+
+ syncpt++;
+ }
+ }
+ igt_debug("Queued %d requests\n", syncpt);
+
+ igt_require(i915_reset_control(false));
Move require to before building the queue of requests so it can skip
quicker.
+
+ /* Feed each request in at 20KHz */
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_THREAD;
+ sev.sigev_value.sival_int = timeline;
+ sev.sigev_notify_function = notify;
+ igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0);
+
+ memset(&its, 0, sizeof(its));
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 20000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 5000;
+ igt_assert(timer_settime(timer, 0, &its, NULL) == 0);
+
+ igt_debug("Triggering wedge\n");
+ wedgeme(fd);
Does it hit the race consistently? I mean how useful would it be to put
the whole subtest in a loop?
Thanks,
Antonio
+
+ igt_debug("Cleaning up\n");
+ timer_delete(timer);
+ close(timeline);
+
+ for (int n = 0; n < NCTX; n++)
+ gem_context_destroy(fd, contexts[n]);
+ free(contexts);
+
+ gem_sync(fd, obj.handle);
+ gem_close(fd, obj.handle);
+
+ igt_assert(i915_reset_control(true));
+ trigger_reset(fd);
+#undef NCTX
+}
+
static int fd = -1;
static void
@@ -478,4 +668,10 @@ igt_main
igt_subtest("in-flight-suspend")
test_inflight_suspend(fd);
+
+ igt_subtest("set-wedged-racing") {
+ igt_skip_on(gem_has_execlists(fd));
+ igt_require_sw_sync();
+ test_set_wedged(fd);
+ }
}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx