From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> A new workload command ('S') is added which allows per context slice (re-)configuration. v2: * Only query device SSEU on first use. (Chris) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- benchmarks/gem_wsim.c | 83 ++++++++++++++++++++++++++++++++++++------ benchmarks/wsim/README | 23 +++++++++++- 2 files changed, 94 insertions(+), 12 deletions(-) diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c index 8dd887a5afd8..ede505e537fd 100644 --- a/benchmarks/gem_wsim.c +++ b/benchmarks/gem_wsim.c @@ -87,6 +87,7 @@ enum w_type LOAD_BALANCE, BOND, TERMINATE, + SSEU }; struct deps @@ -136,6 +137,7 @@ struct w_step uint64_t bond_mask; enum intel_engine_id bond_master; }; + int sseu; }; /* Implementation details */ @@ -171,6 +173,7 @@ struct ctx { bool targets_instance; bool wants_balance; unsigned int static_vcs; + uint64_t sseu; }; struct workload @@ -241,6 +244,9 @@ static unsigned int context_vcs_rr; static int verbose = 1; static int fd; +static struct drm_i915_gem_context_param_sseu device_sseu = { + .slice_mask = -1 /* Force read on first use. */ +}; #define SWAPVCS (1<<0) #define SEQNO (1<<1) @@ -482,6 +488,27 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w) int_field(SYNC, target, tmp >= 0 || ((int)nr_steps + tmp) < 0, "Invalid sync target at step %u!\n"); + } else if (!strcmp(field, "S")) { + unsigned int nr = 0; + while ((field = strtok_r(fstart, ".", &fctx))) { + tmp = atoi(field); + check_arg(tmp <= 0 && nr == 0, + "Invalid context at step %u!\n", + nr_steps); + check_arg(nr > 1, + "Invalid SSEU format at step %u!\n", + nr_steps); + + if (nr == 0) + step.context = tmp; + else if (nr == 1) + step.sseu = tmp; + + nr++; + } + + step.type = SSEU; + goto add_step; } else if (!strcmp(field, "t")) { int_field(THROTTLE, throttle, tmp < 0, @@ -1141,24 +1168,38 @@ find_engine(struct i915_engine_class_instance *ci, unsigned int count, return 0; } -static void -set_ctx_sseu(uint32_t ctx) +static struct drm_i915_gem_context_param_sseu get_device_sseu(void) { - struct drm_i915_gem_context_param_sseu sseu = { }; struct drm_i915_gem_context_param param = { }; - sseu.class = I915_ENGINE_CLASS_RENDER; - sseu.instance = 0; + if (device_sseu.slice_mask == -1) { + param.param = I915_CONTEXT_PARAM_SSEU; + param.value = (uintptr_t)&device_sseu; + + gem_context_get_param(fd, ¶m); + } + + return device_sseu; +} + +static uint64_t +set_ctx_sseu(uint32_t ctx, uint64_t slice_mask) +{ + struct drm_i915_gem_context_param_sseu sseu = get_device_sseu(); + struct drm_i915_gem_context_param param = { }; + + if (slice_mask == -1) + slice_mask = device_sseu.slice_mask; + + sseu.slice_mask = slice_mask; param.ctx_id = ctx; param.param = I915_CONTEXT_PARAM_SSEU; param.value = (uintptr_t)&sseu; - gem_context_get_param(fd, ¶m); - - sseu.slice_mask = 1; - gem_context_set_param(fd, ¶m); + + return slice_mask; } static int @@ -1352,6 +1393,7 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) igt_assert(ctx_id); ctx->id = ctx_id; + ctx->sseu = device_sseu.slice_mask; if (flags & GLOBAL_BALANCE) { ctx->static_vcs = context_vcs_rr; @@ -1512,8 +1554,10 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) gem_context_set_param(fd, ¶m); } - if (wrk->sseu) - set_ctx_sseu(arg.ctx_id); + if (wrk->sseu) { + /* Set to slice 0 only, one slice. */ + ctx->sseu = set_ctx_sseu(ctx_id, 1); + } if (share_vm) vm_destroy(fd, share_vm); @@ -1550,6 +1594,16 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags) } } + /* + * Scan for SSEU control steps. + */ + for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) { + if (w->type == SSEU) { + get_device_sseu(); + break; + } + } + /* * Allocate batch buffers. */ @@ -2485,6 +2539,13 @@ static void *run_workload(void *data) w->type == LOAD_BALANCE || w->type == BOND) { continue; + } else if (w->type == SSEU) { + if (w->sseu != wrk->ctx_list[w->context].sseu) { + wrk->ctx_list[w->context].sseu = + set_ctx_sseu(wrk->ctx_list[w->context].id, + w->sseu); + } + continue; } if (do_sleep || w->type == PERIOD) { diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README index 497d5cad2142..9f770217f075 100644 --- a/benchmarks/wsim/README +++ b/benchmarks/wsim/README @@ -5,7 +5,7 @@ ctx.engine.duration_us.dependency.wait,... <uint>.<str>.<uint>[-<uint>]|*.<int <= 0>[/<int <= 0>][...].<0|1>,... B.<uint> M.<uint>.<str>[|<str>]... -P|X.<uint>.<int> +P|S|X.<uint>.<int> d|p|s|t|q|a|T.<int>,... b.<uint>.<str>[|<str>].<str> f @@ -30,6 +30,7 @@ Additional workload steps are also supported: 'b' - Set up engine bonds. 'M' - Set up engine map. 'P' - Context priority. + 'S' - Context SSEU configuration. 'T' - Terminate an infinite batch. 'X' - Context preemption control. @@ -254,3 +255,23 @@ then look like: 1.DEFAULT.1000.f-1.0 2.DEFAULT.1000.s-1.0 a.-3 + +Context SSEU configuration +-------------------------- + + S.1.1 + 1.RCS.1000.0.0 + S.2.-1 + 2.RCS.1000.0.0 + +Context 1 is configured to run with one enabled slice (slice mask 1) and a batch +is sumitted against it. Context 2 is configured to run with all slices (this is +the default so the command could also be omitted) and a batch submitted against +it. + +This shows the dynamic SSEU reconfiguration cost beween two contexts competing +for the render engine. + +Slice mask of -1 has a special meaning of "all slices". Otherwise any integer +can be specifying as the slice mask, but beware any apart from 1 and -1 can make +the workload not portable between different GPUs. -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx