Re: [PATCH 01/11] drm/i915/gem: Make context persistence optional

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Oct 24, 2019 at 6:40 AM Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote:
Our existing behaviour is to allow contexts and their GPU requests to
persist past the point of closure until the requests are complete. This
allows clients to operate in a 'fire-and-forget' manner where they can
setup a rendering pipeline and hand it over to the display server and
immediately exiting. As the rendering pipeline is kept alive until
completion, the display server (or other consumer) can use the results
in the future and present them to the user.

However, not all clients want this persistent behaviour and would prefer
that the contexts are cleaned up immediately upon closure. This ensures
that when clients are run without hangchecking, any GPU hang is
terminated with the process and does not continue to hog resources.

By defining a context property to allow clients to control persistence
explicitly, we can remove the blanket advice to disable hangchecking
that seems to be far too prevalent.

Just to be clear, when you say "disable hangchecking" do you mean disabling it for all processes via a kernel parameter at boot time or a sysfs entry or similar?  Or is there some mechanism whereby a context can request no hang checking?
 
The default behaviour for new controls is the legacy persistence mode.
New clients will have to opt out for immediate cleanup on context
closure. If the hangchecking modparam is disabled, so is persistent
context support -- all contexts will be terminated on closure.

What happens to fences when the context is cancelled?  Is it the same behavior as we have today for when a GPU hang is detected and a context is banned?

--Jason

 
Testcase: igt/gem_ctx_persistence
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx>
Cc: Jon Bloomfield <jon.bloomfield@xxxxxxxxx>
Reviewed-by: Jon Bloomfield <jon.bloomfield@xxxxxxxxx>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 50 ++++++++++++++++++-
 drivers/gpu/drm/i915/gem/i915_gem_context.h   | 15 ++++++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  1 +
 .../gpu/drm/i915/gem/selftests/mock_context.c |  2 +
 include/uapi/drm/i915_drm.h                   | 15 ++++++
 5 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 55f1f93c0925..0f1bbf5d1a11 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -437,12 +437,39 @@ static void context_close(struct i915_gem_context *ctx)
         * case we opt to forcibly kill off all remaining requests on
         * context close.
         */
-       if (!i915_modparams.enable_hangcheck)
+       if (!i915_gem_context_is_persistent(ctx) ||
+           !i915_modparams.enable_hangcheck)
                kill_context(ctx);

        i915_gem_context_put(ctx);
 }

+static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
+{
+       if (i915_gem_context_is_persistent(ctx) == state)
+               return 0;
+
+       if (state) {
+               /*
+                * Only contexts that are short-lived [that will expire or be
+                * reset] are allowed to survive past termination. We require
+                * hangcheck to ensure that the persistent requests are healthy.
+                */
+               if (!i915_modparams.enable_hangcheck)
+                       return -EINVAL;
+
+               i915_gem_context_set_persistence(ctx);
+       } else {
+               /* To cancel a context we use "preempt-to-idle" */
+               if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+                       return -ENODEV;
+
+               i915_gem_context_clear_persistence(ctx);
+       }
+
+       return 0;
+}
+
 static struct i915_gem_context *
 __create_context(struct drm_i915_private *i915)
 {
@@ -477,6 +504,7 @@ __create_context(struct drm_i915_private *i915)

        i915_gem_context_set_bannable(ctx);
        i915_gem_context_set_recoverable(ctx);
+       __context_set_persistence(ctx, true /* cgroup hook? */);

        for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
                ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
@@ -633,6 +661,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
                return ctx;

        i915_gem_context_clear_bannable(ctx);
+       i915_gem_context_set_persistence(ctx);
        ctx->sched.priority = I915_USER_PRIORITY(prio);

        GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -1743,6 +1772,16 @@ get_engines(struct i915_gem_context *ctx,
        return err;
 }

+static int
+set_persistence(struct i915_gem_context *ctx,
+               const struct drm_i915_gem_context_param *args)
+{
+       if (args->size)
+               return -EINVAL;
+
+       return __context_set_persistence(ctx, args->value);
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
                        struct i915_gem_context *ctx,
                        struct drm_i915_gem_context_param *args)
@@ -1820,6 +1859,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
                ret = set_engines(ctx, args);
                break;

+       case I915_CONTEXT_PARAM_PERSISTENCE:
+               ret = set_persistence(ctx, args);
+               break;
+
        case I915_CONTEXT_PARAM_BAN_PERIOD:
        default:
                ret = -EINVAL;
@@ -2272,6 +2315,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
                ret = get_engines(ctx, args);
                break;

+       case I915_CONTEXT_PARAM_PERSISTENCE:
+               args->size = 0;
+               args->value = i915_gem_context_is_persistent(ctx);
+               break;
+
        case I915_CONTEXT_PARAM_BAN_PERIOD:
        default:
                ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index cfe80590f0ed..18e50a769a6e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -76,6 +76,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
        clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
 }

+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
+{
+       return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
+{
+       set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
+{
+       clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
 static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
 {
        return test_bit(CONTEXT_BANNED, &ctx->flags);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index fe97b8ba4fda..861d7d92fe9f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -137,6 +137,7 @@ struct i915_gem_context {
 #define UCONTEXT_NO_ERROR_CAPTURE      1
 #define UCONTEXT_BANNABLE              2
 #define UCONTEXT_RECOVERABLE           3
+#define UCONTEXT_PERSISTENCE           4

        /**
         * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 74ddd682c9cd..29b8984f0e47 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -22,6 +22,8 @@ mock_context(struct drm_i915_private *i915,
        INIT_LIST_HEAD(&ctx->link);
        ctx->i915 = i915;

+       i915_gem_context_set_persistence(ctx);
+
        mutex_init(&ctx->engines_mutex);
        e = default_engines(ctx);
        if (IS_ERR(e))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 63d40cba97e0..b5fd5e4bd16e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1572,6 +1572,21 @@ struct drm_i915_gem_context_param {
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES     0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and the quit.
+ * If the context is not marked as persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
 /* Must be kept compact -- no holes and well documented */

        __u64 value;
--
2.24.0.rc0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux