A context encompasses the driver's view of process related state, and encapsulates the logical GPU state where available. Each context is currently equivalent to a process in CPU terms. Like with processes, sometimes the user wants a lighter encapsulation that shares some state with the parent process, for example two threads have unique register state but share the virtual memory mappings. We can support exactly the same principle using contexts where we may share the GTT but keep the logical GPU state distinct. This allows quicker switching between those contexts, and for userspace to allocate a single offset in the GTT and use it across multiple contexts. Like with clone(), in the future we may wish to allow userspace to select more features to copy across from the parent, but for now we only allow sharing of the GTT. Note that if full per-process GTT is not supported on the harder, the GTT are already implicitly shared between contexts, and this request to create contexts with shared GTT fails. With full ppGTT, every fd (i.e. every process) is allocated a unique GTT so this request cannot be used to share GTT between processes/fds, it can only share GTT belonging to this fd. Testcase: igt/gem_ctx_shared Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_context.c | 62 ++++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/selftests/huge_pages.c | 1 - drivers/gpu/drm/i915/selftests/i915_gem_context.c | 234 +++++++++++++++++----- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 1 - drivers/gpu/drm/i915/selftests/mock_context.c | 2 +- include/uapi/drm/i915_drm.h | 11 +- 8 files changed, 241 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 648e7536ff51..3e480b22cc44 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -109,6 +109,8 @@ static void lut_close(struct i915_gem_context *ctx) struct i915_vma *vma = rcu_dereference_raw(*slot); radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); + + vma->open_count--; __i915_gem_object_release_unless_active(vma->obj); } rcu_read_unlock(); @@ -201,8 +203,6 @@ static void context_close(struct i915_gem_context *ctx) * the ppgtt). */ lut_close(ctx); - if (ctx->ppgtt) - i915_ppgtt_close(&ctx->ppgtt->base); ctx->file_priv = ERR_PTR(-EBADF); i915_gem_context_put(ctx); @@ -338,6 +338,8 @@ static void __destroy_hw_context(struct i915_gem_context *ctx, context_close(ctx); } +#define CREATE_VM BIT(0) + /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as @@ -345,7 +347,8 @@ static void __destroy_hw_context(struct i915_gem_context *ctx, */ static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv, + unsigned int flags) { struct i915_gem_context *ctx; @@ -358,7 +361,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (IS_ERR(ctx)) return ctx; - if (USES_FULL_PPGTT(dev_priv)) { + if (flags & CREATE_VM && USES_FULL_PPGTT(dev_priv)) { struct i915_hw_ppgtt *ppgtt; ppgtt = i915_ppgtt_create(dev_priv, file_priv, ctx->name); @@ -423,7 +426,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - ctx = i915_gem_create_context(i915, NULL); + ctx = i915_gem_create_context(i915, NULL, CREATE_VM); if (IS_ERR(ctx)) return ctx; @@ -545,7 +548,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, idr_init(&file_priv->context_idr); mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, file_priv); + ctx = i915_gem_create_context(i915, file_priv, CREATE_VM); mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { idr_destroy(&file_priv->context_idr); @@ -642,10 +645,12 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_context_create *args = data; + struct drm_i915_gem_context_create_v2 *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_context *share = NULL; struct i915_gem_context *ctx; - int ret; + unsigned int flags = CREATE_VM; + int err; if (!dev_priv->engine[RCS]->context_size) return -ENODEV; @@ -653,6 +658,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (args->pad != 0) return -EINVAL; + if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT) + return -EINVAL; + if (client_is_banned(file_priv)) { DRM_DEBUG("client %s[%d] banned from creating ctx\n", current->comm, @@ -661,21 +669,45 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + if (args->flags & I915_GEM_CONTEXT_SHARE_GTT) { + share = i915_gem_context_lookup(file_priv, args->share_ctx); + if (!share) + return -ENOENT; + + if (!share->ppgtt) { + err = -ENODEV; + goto out; + } - ctx = i915_gem_create_context(dev_priv, file_priv); + flags &= ~CREATE_VM; + } + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; + + ctx = i915_gem_create_context(dev_priv, file_priv, flags); mutex_unlock(&dev->struct_mutex); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + if (!(flags & CREATE_VM)) { + i915_ppgtt_get(share->ppgtt); + ctx->ppgtt = share->ppgtt; + ctx->desc_template = share->desc_template; + } GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); args->ctx_id = ctx->user_handle; DRM_DEBUG("HW context %d created\n", args->ctx_id); - return 0; +out: + if (share) + i915_gem_context_put(share); + return err; } int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c5f393870532..2c422eed1e28 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2207,7 +2207,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, return ppgtt; } -void i915_ppgtt_close(struct i915_address_space *vm) +static void ppgtt_close(struct i915_address_space *vm) { struct list_head *phases[] = { &vm->active_list, @@ -2235,6 +2235,8 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); + ppgtt_close(&ppgtt->base); + /* vmas should already be unbound and destroyed */ WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a42890d9af38..7f46ceeae9a0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -593,7 +593,6 @@ void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, struct drm_i915_file_private *fpriv, const char *name); -void i915_ppgtt_close(struct i915_address_space *vm); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index e6b31041cc88..d8022794f86d 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1712,7 +1712,6 @@ int i915_gem_huge_page_mock_selftests(void) err = i915_subtests(tests, ppgtt); out_close: - i915_ppgtt_close(&ppgtt->base); i915_ppgtt_put(ppgtt); out_unlock: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 56a803d11916..2a651ca2791d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -127,10 +127,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; @@ -173,7 +169,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, i915_vma_unpin(batch); i915_vma_close(batch); - i915_vma_move_to_active(vma, rq, 0); + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); reservation_object_lock(obj->resv, NULL); @@ -220,7 +216,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) return 0; } -static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) +static noinline int cpu_check(struct drm_i915_gem_object *obj, + unsigned int idx, unsigned int max) { unsigned int n, m, needs_flush; int err; @@ -238,8 +235,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) for (m = 0; m < max; m++) { if (map[m] != m) { - pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", - n, m, map[m], m); + pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n", + __builtin_return_address(0), idx, n, m, map[m], m); err = -EINVAL; goto out_unmap; } @@ -247,8 +244,8 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) for (; m < DW_PER_PAGE; m++) { if (map[m] != 0xdeadbeef) { - pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", - n, m, map[m], 0xdeadbeef); + pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x\n", + __builtin_return_address(0), idx, n, m, map[m], 0xdeadbeef); err = -EINVAL; goto out_unmap; } @@ -311,6 +308,10 @@ create_test_object(struct i915_gem_context *ctx, return ERR_PTR(err); } + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return ERR_PTR(err); + list_add_tail(&obj->st_link, objects); return obj; } @@ -326,12 +327,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj = NULL; - struct drm_file *file; - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - unsigned long ncontexts, ndwords, dw; - bool first_shared_gtt = true; + struct intel_engine_cs *engine; + enum intel_engine_id id; int err = -ENODEV; /* Create a few different contexts (with different mm) and write @@ -339,37 +336,160 @@ static int igt_ctx_exec(void *arg) * up in the expected pages of our obj. */ - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); + for_each_engine(engine, i915, id) { + struct drm_i915_gem_object *obj = NULL; + struct drm_file *file; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + bool first_shared_gtt = true; + + if (!intel_engine_can_store_dword(engine)) + continue; - mutex_lock(&i915->drm.struct_mutex); + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); - ncontexts = 0; - ndwords = 0; - dw = 0; - while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - unsigned int id; + mutex_lock(&i915->drm.struct_mutex); - if (first_shared_gtt) { - ctx = __create_hw_context(i915, file->driver_priv); - first_shared_gtt = false; - } else { - ctx = i915_gem_create_context(i915, file->driver_priv); + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct i915_gem_context *ctx; + + if (first_shared_gtt) { + ctx = __create_hw_context(i915, file->driver_priv); + first_shared_gtt = false; + } else { + ctx = i915_gem_create_context(i915, + file->driver_priv, + CREATE_VM); + } + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + if (!obj) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + intel_runtime_pm_get(i915); + err = gpu_fill(obj, ctx, engine, dw); + intel_runtime_pm_put(i915); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + + ndwords++; + ncontexts++; + } + + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", + ncontexts, engine->name, ndwords); + + ncontexts = dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, ncontexts++, rem); + if (err) + break; + + dw += rem; } - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); + +out_unlock: + i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + if (err) + return err; + } + + return 0; +} + +static int igt_shared_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENODEV; + + /* + * Create a few different contexts with the same mm and write + * through each ctx using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + struct drm_i915_gem_object *obj = NULL; + struct drm_file *file; + struct i915_gem_context *parent; + + if (!intel_engine_can_store_dword(engine)) + continue; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + parent = i915_gem_create_context(i915, file->driver_priv, CREATE_VM); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + goto out_unlock; + } + + if (!parent->ppgtt) { + err = 0; goto out_unlock; } - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) - continue; + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct i915_gem_context *ctx; + + ctx = i915_gem_create_context(i915, + file->driver_priv, + 0); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + i915_ppgtt_get(parent->ppgtt); + ctx->ppgtt = parent->ppgtt; + ctx->desc_template = parent->desc_template; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(parent, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_unlock; @@ -391,30 +511,35 @@ static int igt_ctx_exec(void *arg) obj = NULL; dw = 0; } + ndwords++; + ncontexts++; } - ncontexts++; - } - pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", - ncontexts, INTEL_INFO(i915)->num_rings, ndwords); + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", + ncontexts, engine->name, ndwords); - dw = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); + ncontexts = dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); - err = cpu_check(obj, rem); - if (err) - break; + err = cpu_check(obj, ncontexts++, rem); + if (err) + break; - dw += rem; - } + dw += rem; + } out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - return err; + mock_file_free(i915, file); + if (err) + return err; + } + + return 0; } static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) @@ -448,6 +573,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { SUBTEST(igt_ctx_exec), + SUBTEST(igt_shared_ctx_exec), }; bool fake_alias = false; int err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 4a28d713a7d8..49e38c7ac76d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -914,7 +914,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); - i915_ppgtt_close(&ppgtt->base); i915_ppgtt_put(ppgtt); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index bbf80d42e793..1209e41ebedb 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -90,5 +90,5 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) { lockdep_assert_held(&i915->drm.struct_mutex); - return i915_gem_create_context(i915, file->driver_priv); + return i915_gem_create_context(i915, file->driver_priv, CREATE_VM); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 536ee4febd74..3f7c9702a913 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -365,7 +365,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) #define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) -#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_v2) #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) @@ -1383,6 +1383,15 @@ struct drm_i915_gem_context_create { __u32 pad; }; +struct drm_i915_gem_context_create_v2 { + /* output: id of new context*/ + __u32 ctx_id; + __u32 flags; +#define I915_GEM_CONTEXT_SHARE_GTT 0x1 + __u32 share_ctx; + __u32 pad; +}; + struct drm_i915_gem_context_destroy { __u32 ctx_id; __u32 pad; -- 2.15.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx