On 14/05/2018 09:02, Chris Wilson wrote:
From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
We want to make sure RT tasks which use a lot of CPU times can submit
batch buffers with roughly the same latency (and certainly not worse)
compared to normal tasks.
v2: Add tests to run across all engines simultaneously to encourage
ksoftirqd to kick in even more often.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> #v1
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
tests/gem_exec_latency.c | 207 +++++++++++++++++++++++++++++++++++++++
1 file changed, 207 insertions(+)
diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c
index 9498c0921..c5816427b 100644
--- a/tests/gem_exec_latency.c
+++ b/tests/gem_exec_latency.c
@@ -36,11 +36,15 @@
#include <sys/time.h>
#include <sys/signal.h>
#include <time.h>
+#include <sched.h>
#include "drm.h"
#include "igt_sysfs.h"
#include "igt_vgem.h"
+#include "igt_dummyload.h"
+#include "igt_stats.h"
+
#include "i915/gem_ring.h"
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
@@ -351,6 +355,189 @@ static void latency_from_ring(int fd,
}
}
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+ const uint32_t mi_arb_chk = 0x5 << 23;
+
+ *spin->batch = mi_arb_chk;
+ *spin->running = 0;
+ __sync_synchronize();
+}
+
+static void
+__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags)
+{
+ struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
+
+ eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
+ eb.flags |= flags | I915_EXEC_NO_RELOC;
+
+ gem_execbuf(fd, &eb);
+}
+
+struct rt_pkt {
+ struct igt_mean mean;
+ double min, max;
+};
+
+static bool __spin_wait(int fd, igt_spin_t *spin)
+{
+ while (!READ_ONCE(*spin->running)) {
+ if (!gem_bo_busy(fd, spin->handle))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Test whether RT thread which hogs the CPU a lot can submit work with
+ * reasonable latency.
+ */
+static void
+rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned int flags)
+#define RTIDLE 0x1
+{
+ const char *passname[] = { "warmup", "normal", "rt" };
+ struct rt_pkt *results;
+ unsigned int engines[16];
+ const char *names[16];
+ unsigned int nengine;
+ int ret;
+
+ results = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(results != MAP_FAILED);
+
+ nengine = 0;
+ if (engine == ALL_ENGINES) {
+ for_each_physical_engine(fd, engine) {
+ if (!gem_can_store_dword(fd, engine))
+ continue;
+
+ engines[nengine] = engine;
+ names[nengine] = e__->name;
+ nengine++;
+ }
+ igt_require(nengine > 1);
+ } else {
+ igt_require(gem_can_store_dword(fd, engine));
+ engines[nengine] = engine;
+ names[nengine] = name;
+ nengine++;
+ }
+
+ igt_fork(child, nengine) {
+ unsigned int pass = 0; /* Three passes: warmup, normal, rt. */
+
+ engine = engines[child];
+ do {
+ struct igt_mean mean;
+ double min = HUGE_VAL;
+ double max = -HUGE_VAL;
+ igt_spin_t *spin;
+
+ igt_mean_init(&mean);
+
+ if (pass == 2) {
+ struct sched_param rt =
+ { .sched_priority = 99 };
+
+ ret = sched_setscheduler(0,
+ SCHED_FIFO | SCHED_RESET_ON_FORK,
+ &rt);
+ if (ret) {
+ igt_warn("Failed to set scheduling policy!\n");
+ break;
+ }
+ }
+
+ spin = __igt_spin_batch_new_poll(fd, 0, engine);
+ if (!spin) {
+ igt_warn("Failed to create spinner! (%s)\n",
+ passname[pass]);
+ break;
+ }
+ igt_spin_busywait_until_running(spin);
+
+ igt_until_timeout(pass > 0 ? 5 : 2) {
+ struct timespec ts = { };
+ double t;
+
+ igt_spin_batch_end(spin);
+ gem_sync(fd, spin->handle);
+ if (flags & RTIDLE)
+ usleep(250);
250us is how long you expect context complete to be received in?
s/RTIDLE/SUBMIT_IDLE/ ?
+
+ /*
+ * If we are oversubscribed (more RT hogs than
+ * cpus) give the others a change to run;
+ * otherwise, they will interrupt us in the
+ * middle of the measurement.
+ */
+ if (nengine > 1)
+ usleep(10*nengine);
Could check for actual number of cpus here.
You want it on top of the RTIDLE sleep?
+
+ __rearm_spin_batch(spin);
+
+ igt_nsec_elapsed(&ts);
+ __submit_spin_batch(fd, spin, engine);
+ if (!__spin_wait(fd, spin)) {
+ igt_warn("Wait timeout! (%s)\n",
+ passname[pass]);
It's not really a timeout how __spin_wait is implemented.
+ break;
+ }
+
+ t = igt_nsec_elapsed(&ts) * 1e-9;
+ if (t > max)
+ max = t;
+ if (t < min)
+ min = t;
+
+ igt_mean_add(&mean, t);
+ }
+
+ igt_spin_batch_free(fd, spin);
+
+ igt_info("%8s %10s: mean=%.2fus stddev=%.3fus [%.2fus, %.2fus] (n=%lu)\n",
+ names[child],
+ passname[pass],
+ igt_mean_get(&mean) * 1e6,
+ sqrt(igt_mean_get_variance(&mean)) * 1e6, > + min * 1e6, max * 1e6,
+ mean.count);
+
+ results[3 * child + pass].mean = mean;
+ results[3 * child + pass].min = min;
+ results[3 * child + pass].max = max;
+ } while (++pass < 3);
+ }
+
+ igt_waitchildren();
+
+ for (unsigned int child = 0; child < nengine; child++) {
+ struct rt_pkt normal = results[3 * child + 1];
+ struct rt_pkt rt = results[3 * child + 2];
+
+ igt_assert(rt.max);
+
+ igt_info("%8s: normal latency=%.2f±%.3fus, rt latency=%.2f±%.3fus\n",
+ names[child],
+ igt_mean_get(&normal.mean) * 1e6,
+ sqrt(igt_mean_get_variance(&normal.mean)) * 1e6,
+ igt_mean_get(&rt.mean) * 1e6,
+ sqrt(igt_mean_get_variance(&rt.mean)) * 1e6);
+
+ igt_assert(igt_mean_get(&rt.mean) <
+ igt_mean_get(&normal.mean) * 2);
+
+ /* The system is noisy; be conservative when declaring fail. */
+ igt_assert(igt_mean_get_variance(&rt.mean) <
+ igt_mean_get_variance(&normal.mean) * 25);
I don't know what "* 25" means in statistical terms. At some point you
said variance doesn't work and you had to go with stdev?
+ }
+
+ munmap(results, 4096);
+}
+
igt_main
{
const struct intel_execution_engine *e;
@@ -373,6 +560,12 @@ igt_main
intel_register_access_init(intel_get_pci_device(), false, device);
}
+ igt_subtest("all-rtidle-submit")
+ rthog_latency_on_ring(device, ALL_ENGINES, "all", RTIDLE);
+
+ igt_subtest("all-rthog-submit")
+ rthog_latency_on_ring(device, ALL_ENGINES, "all", 0);
+
igt_subtest_group {
igt_fixture
igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
@@ -391,6 +584,20 @@ igt_main
e->exec_id | e->flags,
e->name, 0);
+ igt_subtest_f("%s-rtidle-submit", e->name)
+ rthog_latency_on_ring(device,
+ e->exec_id |
+ e->flags,
+ e->name,
+ RTIDLE);
+
+ igt_subtest_f("%s-rthog-submit", e->name)
+ rthog_latency_on_ring(device,
+ e->exec_id |
+ e->flags,
+ e->name,
+ 0);
+
igt_subtest_f("%s-dispatch-queued", e->name)
latency_on_ring(device,
e->exec_id | e->flags,
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx