We want to allow userspace to reconfigure the subslice configuration for
its own use case. To do so, we expose a context parameter to allow
adjustment of the RPCS register stored within the context image (and
currently not accessible via LRI). If the context is adjusted before
first use, the adjustment is for "free"; otherwise if the context is
active we flush the context off the GPU (stalling all users) and forcing
the GPU to save the context to memory where we can modify it and so
ensure that the register is reloaded on next execution.
The overhead of managing additional EU subslices can be significant,
especially in multi-context workloads. Non-GPGPU contexts should
preferably disable the subslices it is not using, and others should
fine-tune the number to match their workload.
We expose complete control over the RPCS register, allowing
configuration of slice/subslice, via masks packed into a u64 for
simplicity. For example,
struct drm_i915_gem_context_param arg;
struct drm_i915_gem_context_param_sseu sseu = { .class = 0,
.instance = 0, };
memset(&arg, 0, sizeof(arg));
arg.ctx_id = ctx;
arg.param = I915_CONTEXT_PARAM_SSEU;
arg.value = (uintptr_t) &sseu;
if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
sseu.packed.subslice_mask = 0;
drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
}
could be used to disable all subslices where supported.
v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
v3: Add ability to program this per engine (Chris)
v4: Move most get_sseu() into i915_gem_context.c (Lionel)
v5: Validate sseu configuration against the device's capabilities (Lionel)
v6: Change context powergating settings through MI_SDM on kernel context (Chris)
v7: Synchronize the requests following a powergating setting change using a global
dependency (Chris)
Iterate timelines through dev_priv.gt.active_rings (Tvrtko)
Disable RPCS configuration setting for non capable users (Lionel/Tvrtko)
v8: s/union intel_sseu/struct intel_sseu/ (Lionel)
s/dev_priv/i915/ (Tvrtko)
Change uapi class/instance fields to u16 (Tvrtko)
Bump mask fields to 64bits (Lionel)
Don't return EPERM when dynamic sseu is disabled (Tvrtko)
v9: Import context image into kernel context's ppgtt only when
reconfiguring powergated slice/subslices (Chris)
Use aliasing ppgtt when needed (Michel)
Tvrtko Ursulin:
v10:
* Update for upstream changes.
* Request submit needs a RPM reference.
* Reject on !FULL_PPGTT for simplicity.
* Pull out get/set param to helpers for readability and less indent.
* Use i915_request_await_dma_fence in add_global_barrier to skip waits
on the same timeline and avoid GEM_BUG_ON.
* No need to explicitly assign a NULL pointer to engine in legacy mode.
* No need to move gen8_make_rpcs up.
* Factored out global barrier as prep patch.
* Allow to only CAP_SYS_ADMIN if !Gen11.
v11:
* Remove engine vfunc in favour of local helper. (Chris Wilson)
* Stop retiring requests before updates since it is not needed
(Chris Wilson)
* Implement direct CPU update path for idle contexts. (Chris Wilson)
* Left side dependency needs only be on the same context timeline.
(Chris Wilson)
* It is sufficient to order the timeline. (Chris Wilson)
* Reject !RCS configuration attempts with -ENODEV for now.
v12:
* Rebase for make_rpcs.
v13:
* Centralize SSEU normalization to make_rpcs.
* Type width checking (uAPI <-> implementation).
* Gen11 restrictions uAPI checks.
* Gen11 subslice count differences handling.
Chris Wilson:
* args->size handling fixes.
* Update context image from GGTT.
* Postpone context image update to pinning.
* Use i915_gem_active_raw instead of last_request_on_engine.
v14:
* Add activity tracker on intel_context to fix the lifetime issues
and simplify the code. (Chris Wilson)
v15:
* Fix context pin leak if no space in ring by simplifying the
context pinning sequence.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
Issue: https://github.com/intel/media-driver/issues/267
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Zhipeng Gong <zhipeng.gong@xxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gem_context.c | 303 +++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_gem_context.h | 6 +
drivers/gpu/drm/i915/intel_lrc.c | 4 +-
include/uapi/drm/i915_drm.h | 43 ++++
4 files changed, 353 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ca2c8fcd1090..aa1f34e63080 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_lrc_reg.h"
#include "intel_workarounds.h"
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -322,6 +323,14 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
return desc;
}
+static void intel_context_retire(struct i915_gem_active *active,
+ struct i915_request *rq)
+{
+ struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+ intel_context_unpin(ce);
+}
+
static struct i915_gem_context *
__create_hw_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
@@ -345,6 +354,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ce->gem_context = ctx;
/* Use the whole device by default */
ce->sseu = intel_device_default_sseu(dev_priv);
+
+ init_request_active(&ce->active, intel_context_retire);
}
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
@@ -846,6 +857,48 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
return 0;
}
+static int get_sseu(struct i915_gem_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct drm_i915_gem_context_param_sseu user_sseu;
+ struct intel_engine_cs *engine;
+ struct intel_context *ce;
+
+ if (args->size == 0)
+ goto out;
+ else if (args->size < sizeof(user_sseu))
+ return -EINVAL;
+
+ if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+ sizeof(user_sseu)))
+ return -EFAULT;
+
+ if (user_sseu.rsvd1 || user_sseu.rsvd2)
+ return -EINVAL;
+
+ engine = intel_engine_lookup_user(ctx->i915,
+ user_sseu.class,
+ user_sseu.instance);
+ if (!engine)
+ return -EINVAL;
+
+ ce = to_intel_context(ctx, engine);
+
+ user_sseu.slice_mask = ce->sseu.slice_mask;
+ user_sseu.subslice_mask = ce->sseu.subslice_mask;
+ user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+ user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+ if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+ sizeof(user_sseu)))
+ return -EFAULT;
+
+out:
+ args->size = sizeof(user_sseu);
+
+ return 0;
+}
+
int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -858,15 +911,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
if (!ctx)
return -ENOENT;
- args->size = 0;