When we allow ourselves to sleep before a GPU reset after disabling submission, even for a few milliseconds, gives an innocent context the opportunity to clear the GPU before the reset occurs. However, how long to sleep depends on the typical non-preemptible duration (a similar problem to determining the ideal preempt-reset timeout or even the heartbeat interval). As this seems of a hard policy decision, punt it to userspace. The timeout can be adjusted using /sys/class/drm/card?/engine/*/stop_timeout_ms Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Jon Bloomfield <jon.bloomfield@xxxxxxxxx> Reviewed-by: Steve Carbonari <steven.carbonari@xxxxxxxxx> Tested-by: Steve Carbonari <steven.carbonari@xxxxxxxxx> --- drivers/gpu/drm/i915/Kconfig.profile | 3 ++ drivers/gpu/drm/i915/gt/sysfs_engines.c | 40 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 9ee3b59685b9..5f4ec3aec1d2 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -63,6 +63,9 @@ config DRM_I915_STOP_TIMEOUT that the reset itself may take longer and so be more disruptive to interactive or low latency workloads. + This is adjustable via + /sys/class/drm/card?/engine/*/stop_timeout_ms + config DRM_I915_TIMESLICE_DURATION int "Scheduling quantum for userspace batches (ms, jiffy granularity)" default 1 # milliseconds diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 061facb41602..93ab6e880fc3 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -232,6 +232,45 @@ timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute timeslice_duration_attr = __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store); +static ssize_t +stop_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long long duration; + int err; + + /* + * When we allow ourselves to sleep before a GPU reset after disabling + * submission, even for a few milliseconds, gives an innocent context + * the opportunity to clear the GPU before the reset occurs. However, + * how long to sleep depends on the typical non-preemptible duration + * (a similar problem to determining the ideal preempt-reset timeout + * or even the heartbeat interval). + */ + + err = kstrtoull(buf, 0, &duration); + if (err) + return err; + + if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT)) + return -EINVAL; + + WRITE_ONCE(engine->props.stop_timeout_ms, duration); + return count; +} + +static ssize_t +stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.stop_timeout_ms); +} + +static struct kobj_attribute stop_timeout_attr = +__ATTR(stop_timeout_ms, 0644, stop_show, stop_store); + static void kobj_engine_release(struct kobject *kobj) { kfree(kobj); @@ -273,6 +312,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) &caps_attr.attr, &all_caps_attr.attr, &max_spin_attr.attr, + &stop_timeout_attr.attr, NULL }; -- 2.25.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx