Quoting Tvrtko Ursulin (2018-05-03 18:18:43) > > On 25/04/2018 12:45, Lionel Landwerlin wrote: > > From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > > > We want to allow userspace to reconfigure the subslice configuration for > > its own use case. To do so, we expose a context parameter to allow > > adjustment of the RPCS register stored within the context image (and > > currently not accessible via LRI). If the context is adjusted before > > first use, the adjustment is for "free"; otherwise if the context is > > active we flush the context off the GPU (stalling all users) and forcing > > the GPU to save the context to memory where we can modify it and so > > ensure that the register is reloaded on next execution. > > > > The overhead of managing additional EU subslices can be significant, > > especially in multi-context workloads. Non-GPGPU contexts should > > preferably disable the subslices it is not using, and others should > > fine-tune the number to match their workload. > > > > We expose complete control over the RPCS register, allowing > > configuration of slice/subslice, via masks packed into a u64 for > > simplicity. For example, > > > > struct drm_i915_gem_context_param arg; > > struct drm_i915_gem_context_param_sseu sseu = { .flags = I915_EXEC_RENDER }; > > > > memset(&arg, 0, sizeof(arg)); > > arg.ctx_id = ctx; > > arg.param = I915_CONTEXT_PARAM_SSEU; > > arg.value = (uintptr_t) &sseu; > > if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) { > > sseu.packed.subslice_mask = 0; > > > > drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg); > > } > > > > could be used to disable all subslices where supported. > > > > v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel) > > > > v3: Add ability to program this per engine (Chris) > > > > v4: Move most get_sseu() into i915_gem_context.c (Lionel) > > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899 > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > > c: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx> > > CC: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > CC: Zhipeng Gong <zhipeng.gong@xxxxxxxxx> > > CC: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_gem_context.c | 82 ++++++++++++++++++++++++- > > drivers/gpu/drm/i915/intel_lrc.c | 55 +++++++++++++++++ > > drivers/gpu/drm/i915/intel_lrc.h | 4 ++ > > include/uapi/drm/i915_drm.h | 28 +++++++++ > > 4 files changed, 168 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > > index bdf050beeb94..b97ddcf47514 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > > @@ -740,6 +740,26 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, > > return 0; > > } > > > > +static struct i915_gem_context_sseu > > +i915_gem_context_sseu_from_user_sseu(const struct sseu_dev_info *sseu, > > + const struct drm_i915_gem_context_param_sseu *user_sseu) > > +{ > > + struct i915_gem_context_sseu value = { > > + .slice_mask = user_sseu->packed.slice_mask == 0 ? > > + sseu->slice_mask : > > + (user_sseu->packed.slice_mask & sseu->slice_mask), > > + .subslice_mask = user_sseu->packed.subslice_mask == 0 ? > > + sseu->subslice_mask[0] : > > + (user_sseu->packed.subslice_mask & sseu->subslice_mask[0]), > > + .min_eus_per_subslice = min(user_sseu->packed.min_eus_per_subslice, > > + sseu->max_eus_per_subslice), > > + .max_eus_per_subslice = min(user_sseu->packed.max_eus_per_subslice, > > + sseu->max_eus_per_subslice), > > + }; > > + > > + return value; > > +} > > + > > int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file) > > { > > @@ -777,6 +797,37 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, > > case I915_CONTEXT_PARAM_PRIORITY: > > args->value = ctx->sched.priority; > > break; > > + case I915_CONTEXT_PARAM_SSEU: { > > + struct drm_i915_gem_context_param_sseu param_sseu; > > + struct intel_engine_cs *engine; > > + > > + if (copy_from_user(¶m_sseu, u64_to_user_ptr(args->value), > > + sizeof(param_sseu))) { > > + ret = -EFAULT; > > + break; > > + } > > + > > + engine = i915_gem_engine_from_flags(to_i915(dev), file, > > + param_sseu.flags); > > + if (!engine) { > > + ret = -EINVAL; > > + break; > > + } > > + > > + param_sseu.packed.slice_mask = > > + ctx->engine[engine->id].sseu.slice_mask; > > + param_sseu.packed.subslice_mask = > > + ctx->engine[engine->id].sseu.subslice_mask; > > + param_sseu.packed.min_eus_per_subslice = > > + ctx->engine[engine->id].sseu.min_eus_per_subslice; > > + param_sseu.packed.max_eus_per_subslice = > > + ctx->engine[engine->id].sseu.max_eus_per_subslice; > > + > > + if (copy_to_user(u64_to_user_ptr(args->value), ¶m_sseu, > > + sizeof(param_sseu))) > > + ret = -EFAULT; > > + break; > > + } > > default: > > ret = -EINVAL; > > break; > > @@ -832,7 +883,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, > > else > > i915_gem_context_clear_bannable(ctx); > > break; > > - > > case I915_CONTEXT_PARAM_PRIORITY: > > { > > s64 priority = args->value; > > @@ -851,7 +901,37 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, > > ctx->sched.priority = priority; > > } > > break; > > + case I915_CONTEXT_PARAM_SSEU: > > + if (args->size) > > + ret = -EINVAL; > > + else if (!HAS_EXECLISTS(ctx->i915)) > > + ret = -ENODEV; > > + else { > > + struct drm_i915_private *dev_priv = to_i915(dev); > > + struct drm_i915_gem_context_param_sseu user_sseu; > > + struct i915_gem_context_sseu ctx_sseu; > > + struct intel_engine_cs *engine; > > + > > + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), > > + sizeof(user_sseu))) { > > + ret = -EFAULT; > > + break; > > + } > > + > > + engine = i915_gem_engine_from_flags(dev_priv, file, > > + user_sseu.flags); > > + if (!engine) { > > + ret = -EINVAL; > > + break; > > + } > > + > > + ctx_sseu = > > + i915_gem_context_sseu_from_user_sseu(&INTEL_INFO(dev_priv)->sseu, > > + &user_sseu); > > > > + ret = intel_lr_context_set_sseu(ctx, engine, &ctx_sseu); > > + } > > + break; > > default: > > ret = -EINVAL; > > break; > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > > index dca17ef24de5..9caddcb1f234 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -2711,6 +2711,61 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) > > } > > } > > > > +int intel_lr_context_set_sseu(struct i915_gem_context *ctx, > > + struct intel_engine_cs *engine, > > + struct i915_gem_context_sseu *sseu) > > +{ > > + struct drm_i915_private *dev_priv = ctx->i915; > > + struct intel_context *ce; > > + enum intel_engine_id id; > > + int ret; > > + > > + lockdep_assert_held(&dev_priv->drm.struct_mutex); > > + > > + if (memcmp(sseu, &ctx->engine[engine->id].sseu, sizeof(*sseu)) == 0) > > + return 0; > > + > > + /* > > + * We can only program this on render ring. > > + */ > > + ce = &ctx->engine[RCS]; > > + > > + if (ce->pin_count) { /* Assume that the context is active! */ > > + ret = i915_gem_switch_to_kernel_context(dev_priv); > > + if (ret) > > + return ret; > > + > > + ret = i915_gem_wait_for_idle(dev_priv, > > + I915_WAIT_INTERRUPTIBLE | > > + I915_WAIT_LOCKED); > > Could we consider the alternative of only allowing this to be configured > on context create? That way we would not need to idle the GPU every time > userspace decides to fiddle with it. It is unprivileged so quite an easy > way for random app to ruin GPU performance for everyone. I think we can do dynamic reconfiguration of the context using LRI. And if we can't do so from within the context, we can use SDM from another. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx