Quoting Matthew Auld (2019-10-25 12:24:42) > We can be more aggressive in our testing by launching a number of > kthreads, where each is submitting its own copy or fill batches on a set > of random sized objects. Also since the underlying fill and copy batches > can be pre-empted mid-batch(for particularly large objects), throw in a > random mixture of ctx priorities per thread to make pre-emption a > possibility. > > Signed-off-by: Matthew Auld <matthew.auld@xxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > .../i915/gem/selftests/i915_gem_object_blt.c | 144 +++++++++++++++--- > 1 file changed, 121 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > index 9ec55b3a3815..41e0bd6a175c 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c > @@ -7,34 +7,53 @@ > > #include "i915_selftest.h" > > +#include "gem/i915_gem_context.h" > #include "selftests/igt_flush_test.h" > +#include "selftests/i915_random.h" > #include "selftests/mock_drm.h" > #include "huge_gem_object.h" > #include "mock_context.h" > > -static int igt_fill_blt(void *arg) > +struct igt_thread_arg { > + struct drm_i915_private *i915; > + struct rnd_state *prng; > +}; > + > +static int igt_fill_blt_thread(void *arg) > { > - struct drm_i915_private *i915 = arg; > - struct intel_context *ce = i915->engine[BCS0]->kernel_context; > + struct igt_thread_arg *thread = arg; > + struct drm_i915_private *i915 = thread->i915; > + struct rnd_state *prng = thread->prng; > struct drm_i915_gem_object *obj; > - struct rnd_state prng; > + struct i915_gem_context *ctx; > + struct intel_context *ce; > + struct drm_file *file; > + unsigned int prio; > IGT_TIMEOUT(end); > - u32 *vaddr; > - int err = 0; > + int err; > + > + file = mock_file(i915); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + > + ctx = live_context(i915, file); > + if (IS_ERR(ctx)) { > + err = PTR_ERR(ctx); > + goto out_file; > + } > > - prandom_seed_state(&prng, i915_selftest.random_seed); > + prio = prandom_u32_state(prng) % I915_PRIORITY_MAX; > + ctx->sched.priority = I915_USER_PRIORITY(prio); > > - /* > - * XXX: needs some threads to scale all these tests, also maybe throw > - * in submission from higher priority context to see if we are > - * preempted for very large objects... > - */ > + ce = i915_gem_context_get_engine(ctx, BCS0); > + GEM_BUG_ON(IS_ERR(ce)); > > do { > const u32 max_block_size = S16_MAX * PAGE_SIZE; > - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); > + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(prng)); > u32 phys_sz = sz % (max_block_size + 1); > - u32 val = prandom_u32_state(&prng); > + u32 val = prandom_u32_state(prng); > + u32 *vaddr; > u32 i; > > sz = round_up(sz, PAGE_SIZE); > @@ -98,26 +117,47 @@ static int igt_fill_blt(void *arg) > if (err == -ENOMEM) > err = 0; > > + intel_context_put(ce); > +out_file: > + mock_file_free(i915, file); > return err; > } > > -static int igt_copy_blt(void *arg) > +static int igt_copy_blt_thread(void *arg) > { > - struct drm_i915_private *i915 = arg; > - struct intel_context *ce = i915->engine[BCS0]->kernel_context; > + struct igt_thread_arg *thread = arg; > + struct drm_i915_private *i915 = thread->i915; > + struct rnd_state *prng = thread->prng; > struct drm_i915_gem_object *src, *dst; > - struct rnd_state prng; > + struct i915_gem_context *ctx; > + struct intel_context *ce; > + struct drm_file *file; > + unsigned int prio; > IGT_TIMEOUT(end); > - u32 *vaddr; > - int err = 0; > + int err; > + > + file = mock_file(i915); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + > + ctx = live_context(i915, file); > + if (IS_ERR(ctx)) { > + err = PTR_ERR(ctx); > + goto out_file; > + } > > - prandom_seed_state(&prng, i915_selftest.random_seed); > + prio = prandom_u32_state(prng) % I915_PRIORITY_MAX; prio = i915_prandom_u32_max_state(prng, I915_PRIORITY_MAX); Usual prng dilemma of high bits being more random than low bits, and it avoids the divide. (Nice trick to remember :) > + ctx->sched.priority = I915_USER_PRIORITY(prio); > + > + ce = i915_gem_context_get_engine(ctx, BCS0); > + GEM_BUG_ON(IS_ERR(ce)); > > do { > const u32 max_block_size = S16_MAX * PAGE_SIZE; > - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); > + u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(prng)); > u32 phys_sz = sz % (max_block_size + 1); > - u32 val = prandom_u32_state(&prng); > + u32 val = prandom_u32_state(prng); > + u32 *vaddr; > u32 i; > > sz = round_up(sz, PAGE_SIZE); > @@ -201,9 +241,67 @@ static int igt_copy_blt(void *arg) > if (err == -ENOMEM) > err = 0; > > + intel_context_put(ce); > +out_file: > + mock_file_free(i915, file); > + return err; > +} > + > +static int igt_threaded_blt(struct drm_i915_private *i915, > + int (*blt_fn)(void *arg)) > +{ > + > + I915_RND_STATE(prng); > + struct igt_thread_arg thread = { i915, &prng, }; Each thread is using the same prng state then? Bah. It's not thread safe so would lead to non-deterministic behaviour. At least it's not every thread using the exact same sequence. thread = kmalloc_array(n_cpus, sizeof(*thread), GFP_KERNEL); for(...) { thread[i]->prng = I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); I suspect we may end up pretifying that as I expect we'll do more and more parallelised smoketesting. > + struct task_struct **tsk; > + unsigned int n_cpus; > + unsigned int i; > + int err = 0; > + > + n_cpus = num_online_cpus() + 1; > + > + tsk = kzalloc(n_cpus * sizeof(struct task_struct *), GFP_KERNEL); > + if (!tsk) > + return 0; > + > + for (i = 0; i < n_cpus; ++i) { > + tsk[i] = kthread_run(blt_fn, &thread, "igt/blt-%d", i); > + if (IS_ERR(tsk[i])) { > + err = PTR_ERR(tsk[i]); > + break; > + } > + > + get_task_struct(tsk[i]); > + } > + > + for (i = 0; i < n_cpus; ++i) { > + int status; > + > + if (IS_ERR_OR_NULL(tsk[i])) > + continue; > + > + status = kthread_stop(tsk[i]); > + if (status && !err) > + err = status; Looks good. > + put_task_struct(tsk[i]); > + } > + I think per-thread deterministic "random" behaviour is critical. But that's the only drawback spotted, so with that resolved one way or another, Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx