Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.
The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.
v2: Fixup freeing of local on success of get_engines()
v3: Allow empty engines[]
v4: s/nengine/num_engines/
Testcase: igt/gem_ctx_engines
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gem_context.c | 226 +++++++++++++++++-
drivers/gpu/drm/i915/i915_gem_context_types.h | 21 ++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +-
drivers/gpu/drm/i915/i915_utils.h | 36 +++
include/uapi/drm/i915_drm.h | 42 +++-
5 files changed, 331 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f36648329074..f038c15e73d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,7 +86,9 @@
*/
#include <linux/log2.h>
+
#include <drm/i915_drm.h>
+
#include "i915_drv.h"
#include "i915_globals.h"
#include "i915_trace.h"
@@ -101,6 +103,21 @@ static struct i915_global_gem_context {
struct kmem_cache *slab_luts;
} global;
+static struct intel_engine_cs *
+lookup_user_engine(struct i915_gem_context *ctx,
+ unsigned long flags, u16 class, u16 instance)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+ if (flags & LOOKUP_USER_INDEX) {
+ if (instance >= ctx->num_engines)
+ return NULL;
+
+ return ctx->engines[instance];
+ }
+
+ return intel_engine_lookup_user(ctx->i915, class, instance);
+}
+
struct i915_lut_handle *i915_lut_handle_alloc(void)
{
return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
@@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
release_hw_id(ctx);
i915_ppgtt_put(ctx->ppgtt);
+ kfree(ctx->engines);
+
rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
intel_context_put(it);
@@ -1377,9 +1396,9 @@ static int set_sseu(struct i915_gem_context *ctx,
if (user_sseu.flags || user_sseu.rsvd)
return -EINVAL;
- engine = intel_engine_lookup_user(i915,
- user_sseu.engine_class,
- user_sseu.engine_instance);
+ engine = lookup_user_engine(ctx, 0,
+ user_sseu.engine_class,
+ user_sseu.engine_instance);
if (!engine)
return -EINVAL;
@@ -1397,9 +1416,166 @@ static int set_sseu(struct i915_gem_context *ctx,
args->size = sizeof(user_sseu);
+ return 0;
+};
+
+struct set_engines {
+ struct i915_gem_context *ctx;
+ struct intel_engine_cs **engines;
+ unsigned int num_engines;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+ const struct drm_i915_gem_context_param *args)
+{
+ struct i915_context_param_engines __user *user;
+ struct set_engines set = { .ctx = ctx };
+ u64 size, extensions;
+ unsigned int n;
+ int err;
+
+ user = u64_to_user_ptr(args->value);
+ size = args->size;
+ if (!size)
+ goto out;
+
+ BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
+ if (size < sizeof(*user) ||
+ !IS_ALIGNED(size, sizeof(*user->class_instance)))
+ return -EINVAL;
+
+ /* Internal limitation of u64 bitmaps + a few bits of u64 in the uABI */
+ set.num_engines =
+ (size - sizeof(*user)) / sizeof(*user->class_instance);
+ if (set.num_engines > I915_EXEC_RING_MASK + 1)
+ return -EINVAL;
+
+ set.engines = kmalloc_array(set.num_engines,
+ sizeof(*set.engines),
+ GFP_KERNEL);
+ if (!set.engines)
+ return -ENOMEM;
+
+ for (n = 0; n < set.num_engines; n++) {
+ u16 class, inst;
+
+ if (get_user(class, &user->class_instance[n].engine_class) ||
+ get_user(inst, &user->class_instance[n].engine_instance)) {
+ kfree(set.engines);
+ return -EFAULT;
+ }
+
+ if (class == (u16)I915_ENGINE_CLASS_INVALID &&
+ inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+ set.engines[n] = NULL;
+ continue;
+ }
+
+ set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
+ if (!set.engines[n]) {
+ kfree(set.engines);
+ return -ENOENT;
+ }
+ }
+
+ err = -EFAULT;
+ if (!get_user(extensions, &user->extensions))
+ err = i915_user_extensions(u64_to_user_ptr(extensions),
+ set_engines__extensions,
+ ARRAY_SIZE(set_engines__extensions),
+ &set);
+ if (err) {
+ kfree(set.engines);
+ return err;
+ }
+
+out:
+ mutex_lock(&ctx->i915->drm.struct_mutex);
+ kfree(ctx->engines);
+ ctx->engines = set.engines;
+ ctx->num_engines = set.num_engines;
+ mutex_unlock(&ctx->i915->drm.struct_mutex);
+
return 0;
}
+static int
+get_engines(struct i915_gem_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ struct i915_context_param_engines *local;
+ size_t n, count, size;
+ int err = 0;
+
+restart:
+ if (!READ_ONCE(ctx->engines)) {
+ args->size = 0;
+ return 0;
+ }
+
+ count = READ_ONCE(ctx->num_engines);
+
+ /* Be paranoid in case we have an impedance mismatch */
+ if (!check_struct_size(local, class_instance, count, &size))
+ return -ENOMEM;
+ if (unlikely(overflows_type(size, args->size)))
+ return -ENOMEM;
+
+ if (!args->size) {
+ args->size = size;
+ return 0;
+ }
+
+ if (args->size < size)
+ return -EINVAL;
+
+ local = kmalloc(size, GFP_KERNEL);
+ if (!local)
+ return -ENOMEM;
+
+ if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
+ err = -EINTR;
+ goto out;
+ }
+
+ if (!ctx->engines || ctx->num_engines != count) {
+ mutex_unlock(&ctx->i915->drm.struct_mutex);
+ kfree(local);
+ goto restart;
+ }
+
+ local->extensions = 0;
+ for (n = 0; n < count; n++) {
+ if (ctx->engines[n]) {
+ local->class_instance[n].engine_class =
+ ctx->engines[n]->uabi_class;
+ local->class_instance[n].engine_instance =
+ ctx->engines[n]->instance;
+ } else {
+ local->class_instance[n].engine_class =
+ I915_ENGINE_CLASS_INVALID;
+ local->class_instance[n].engine_instance =
+ I915_ENGINE_CLASS_INVALID_NONE;
+ }
+ }
+
+ mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+ if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
+ err = -EFAULT;
+ goto out;
+ }
+ args->size = size;
+
+out:
+ kfree(local);
+ return err;
+}
+
static int ctx_setparam(struct i915_gem_context *ctx,
struct drm_i915_gem_context_param *args)
{
@@ -1472,6 +1648,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
ret = set_ppgtt(ctx, args);
break;
+ case I915_CONTEXT_PARAM_ENGINES:
+ ret = set_engines(ctx, args);
+ break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
@@ -1500,6 +1680,35 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
return ctx_setparam(arg->ctx, &local.param);
}
+static int clone_engines(struct i915_gem_context *dst,
+ struct i915_gem_context *src)
+{
+ struct intel_engine_cs **engines;
+ unsigned int num_engines;
+
+ mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
+
+ /* handle ZERO_SIZE_PTR on behalf of kmemdup */
+ num_engines = src->num_engines;
+ engines = src->engines;
+ if (!ZERO_OR_NULL_PTR(engines)) {
+ engines = kmemdup(engines,
+ sizeof(*engines) * num_engines,
+ GFP_KERNEL);
+ if (!engines) {
+ mutex_unlock(&src->i915->drm.struct_mutex);
+ return -ENOMEM;
+ }
+ }
+
+ mutex_unlock(&src->i915->drm.struct_mutex);
+
+ kfree(dst->engines);
+ dst->engines = engines;
+ dst->num_engines = num_engines;
+ return 0;
+}
+
static int clone_flags(struct i915_gem_context *dst,
struct i915_gem_context *src)
{
@@ -1608,6 +1817,7 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
static int (* const fn[])(struct i915_gem_context *dst,
struct i915_gem_context *src) = {
#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
+ MAP(ENGINES, clone_engines),
MAP(FLAGS, clone_flags),
MAP(SCHEDATTR, clone_schedattr),
MAP(SSEU, clone_sseu),
@@ -1770,9 +1980,9 @@ static int get_sseu(struct i915_gem_context *ctx,
if (user_sseu.flags || user_sseu.rsvd)
return -EINVAL;
- engine = intel_engine_lookup_user(ctx->i915,
- user_sseu.engine_class,
- user_sseu.engine_instance);
+ engine = lookup_user_engine(ctx, 0,
+ user_sseu.engine_class,
+ user_sseu.engine_instance);
if (!engine)
return -EINVAL;
@@ -1853,6 +2063,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
ret = get_ppgtt(ctx, args);
break;
+ case I915_CONTEXT_PARAM_ENGINES:
+ ret = get_engines(ctx, args);
+ break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index e2ec58b10fb2..46b6080b2240 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -41,6 +41,20 @@ struct i915_gem_context {
/** file_priv: owning file descriptor */
struct drm_i915_file_private *file_priv;
+ /**
+ * @engines: User defined engines for this context
+ *
+ * NULL means to use legacy definitions (including random meaning of
+ * I915_EXEC_BSD with I915_EXEC_BSD_SELECTOR overrides).
+ *
+ * If defined, execbuf uses the I915_EXEC_MASK as an index into
+ * array, and various uAPI other the ability to lookup up an
+ * index from this array to select an engine operate on.
+ *
+ * User defined by I915_CONTEXT_PARAM_ENGINE.
+ */
+ struct intel_engine_cs **engines;
+
struct i915_timeline *timeline;
/**
@@ -110,6 +124,13 @@ struct i915_gem_context {
#define CONTEXT_CLOSED 1
#define CONTEXT_FORCE_SINGLE_SUBMISSION 2
+ /**
+ * @num_engines: Number of user defined engines for this context
+ *
+ * See @engines for the elements.
+ */
+ unsigned int num_engines;
+
/**
* @hw_id: - unique identifier for the context
*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d672c9edb94..66b3921cc8bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2089,13 +2089,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
};
static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args)
{
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
struct intel_engine_cs *engine;
+ if (eb->ctx->engines) {
+ if (user_ring_id >= eb->ctx->num_engines)
+ return NULL;
+
+ return eb->ctx->engines[user_ring_id];
+ }
+
if (user_ring_id > I915_USER_RINGS) {
DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
return NULL;
@@ -2108,11 +2115,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
return NULL;
}
- if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+ if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
- bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+ bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2123,9 +2130,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
return NULL;
}
- engine = dev_priv->engine[_VCS(bsd_idx)];
+ engine = eb->i915->engine[_VCS(bsd_idx)];
} else {
- engine = dev_priv->engine[user_ring_map[user_ring_id]];
+ engine = eb->i915->engine[user_ring_map[user_ring_id]];
}
if (!engine) {
@@ -2335,7 +2342,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (unlikely(err))
goto err_destroy;
- eb.engine = eb_select_engine(eb.i915, file, args);
+ eb.engine = eb_select_engine(&eb, file, args);
if (!eb.engine) {
err = -EINVAL;
goto err_engine;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 2dbe8933b50a..1436fe2fb5f8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,6 +25,9 @@
#ifndef __I915_UTILS_H
#define __I915_UTILS_H
+#include <linux/kernel.h>
+#include <linux/overflow.h>
+
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
#if 0
@@ -73,6 +76,39 @@
#define overflows_type(x, T) \
(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
+static inline bool
+__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
+{
+ size_t sz;
+
+ if (check_mul_overflow(count, arr, &sz))
+ return false;
+
+ if (check_add_overflow(sz, base, &sz))
+ return false;
+
+ *size = sz;
+ return true;
+}
+
+/**
+ * check_struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ * @sz: Total size of structure and array
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements, like struct_size() but reports
+ * whether it overflowed, and the resultant size in @sz
+ *
+ * Return: false if the calculation overflowed.
+ */
+#define check_struct_size(p, member, n, sz) \
+ likely(__check_struct_size(sizeof(*(p)), \
+ sizeof(*(p)->member) + __must_be_array((p)->member), \
+ n, sz))
+
#define ptr_mask_bits(ptr, n) ({ \
unsigned long __v = (unsigned long)(ptr); \
(typeof(ptr))(__v & -BIT(n)); \
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a5bdb86858f6..4e67c2395b46 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -126,6 +126,8 @@ enum drm_i915_gem_engine_class {
I915_ENGINE_CLASS_INVALID = -1
};
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+
/**
* DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
*
@@ -1511,6 +1513,26 @@ struct drm_i915_gem_context_param {
* See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
*/
#define I915_CONTEXT_PARAM_VM 0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ * engine_class: I915_ENGINE_CLASS_INVALID,
+ * engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES 0xa
/* Must be kept compact -- no holes and well documented */
__u64 value;
@@ -1575,6 +1597,23 @@ struct drm_i915_gem_context_param_sseu {
__u32 rsvd;
};
+struct i915_context_param_engines {
+ __u64 extensions; /* linked chain of extension blocks, 0 terminates */
+
+ struct {
+ __u16 engine_class; /* see enum drm_i915_gem_engine_class */
+ __u16 engine_instance;
+ } class_instance[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+ __u64 extensions; \
+ struct { \
+ __u16 engine_class; \
+ __u16 engine_instance; \
+ } class_instance[N__]; \
+} __attribute__((packed)) name__
+
struct drm_i915_gem_context_create_ext_setparam {
#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
struct i915_user_extension base;
@@ -1591,7 +1630,8 @@ struct drm_i915_gem_context_create_ext_clone {
#define I915_CONTEXT_CLONE_SSEU (1u << 2)
#define I915_CONTEXT_CLONE_TIMELINE (1u << 3)
#define I915_CONTEXT_CLONE_VM (1u << 4)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+#define I915_CONTEXT_CLONE_ENGINES (1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
__u64 rsvd;
};