If the preempted context takes too long to relinquish control, e.g. it is stuck inside a shader with arbitration disabled, evict that context with an engine reset. This ensures that preemptions are reasonably responsive, providing a tighter QoS for the more important context at the cost of flagging unresponsive contexts more frequently (i.e. instead of using an ~10s hangcheck, we now evict at ~100ms). The challenge of lies in picking a timeout that can be reasonably serviced by HW for typical workloads, balancing the existing clients against the needs for responsiveness. Note that coupled with timeslicing, this will lead to rapid GPU "hang" detection with multiple active contexts vying for GPU time. The preempt timeout can be adjusted per-engine using, /sys/class/drm/card0/engine/*/preempt_timeout_ms v2: Couple in sysfs control of preemption timeout Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/Kconfig.profile | 15 ++++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 32 +++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 90 ++++++++++++++++++-- drivers/gpu/drm/i915/i915_params.h | 2 +- 6 files changed, 141 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 48df8889a88a..3db4ca5492a9 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -25,3 +25,18 @@ config DRM_I915_SPIN_REQUEST May be 0 to disable the initial spin. In practice, we estimate the cost of enabling the interrupt (if currently disabled) to be a few microseconds. + +config DRM_I915_PREEMPT_TIMEOUT + int "Preempt timeout (ms)" + default 100 # milliseconds + help + How long to wait (in milliseconds) for a preemption event to occur + when submitting a new context via execlists. If the current context + does not hit an arbitration point and yield to HW before the timer + expires, the HW will be reset to allow the more important context + to execute. + + This is adjustable via + /sys/class/drm/card0/engine/*/preempt_timeout_ms + + May be 0 to disable the timeout. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 80fd072ac719..dfdffaf35678 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -304,6 +304,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->props.preempt_timeout = CONFIG_DRM_I915_PREEMPT_TIMEOUT; + /* * To be overridden by the backend on setup. However to facilitate * cleanup on error during setup, we always provide the destroy vfunc. diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c index cbe9ec59beeb..aac26097c916 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c @@ -45,10 +45,37 @@ mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base); } +static ssize_t +preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->props.preempt_timeout); +} + +static ssize_t +preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + unsigned long timeout; + int err; + + err = kstrtoul(buf, 0, &timeout); + if (err) + return err; + + engine->props.preempt_timeout = timeout; + return count; +} + static struct kobj_attribute name_attr = __ATTR(name, 0444, name_show, NULL); static struct kobj_attribute class_attr = __ATTR(class, 0444, class_show, NULL); static struct kobj_attribute inst_attr = __ATTR(instance, 0444, inst_show, NULL); static struct kobj_attribute mmio_attr = __ATTR(mmio_base, 0444, mmio_show, NULL); +static struct kobj_attribute preempt_timeout_attr = +__ATTR(preempt_timeout_ms, 0600, preempt_timeout_show, preempt_timeout_store); static void kobj_engine_release(struct kobject *kobj) { @@ -109,6 +136,11 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) if (sysfs_create_files(kobj, files)) goto err_engine; + if (CONFIG_DRM_I915_PREEMPT_TIMEOUT && + intel_engine_has_preemption(engine) && + sysfs_create_file(kobj, &preempt_timeout_attr.attr)) + goto err_engine; + if (0) { err_engine: dev_err(kdev, "Failed to add sysfs engine '%s'\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 943f0663837e..61d271668948 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -173,6 +173,11 @@ struct intel_engine_execlists { */ struct timer_list timer; + /** + * @preempt: reset the current context if it fails to give way + */ + struct timer_list preempt; + /** * @default_priolist: priority list for I915_PRIORITY_NORMAL */ @@ -539,6 +544,10 @@ struct intel_engine_cs { */ ktime_t total; } stats; + + struct { + unsigned long preempt_timeout; + } props; }; static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 431d3b8c3371..7741d78139ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1394,6 +1394,29 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } +static void set_preempt_timeout(struct intel_engine_cs *engine) +{ + unsigned long timeout; + + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return; + + timeout = READ_ONCE(engine->props.preempt_timeout); + if (!timeout) + return; + + timeout = msecs_to_jiffies_timeout(timeout); + /* + * Paranoia to make sure the compiler computes the timeout before + * loading 'jiffies' as jiffies is volatile and may be updated in + * the background by a timer tick. All to reduce the complexity + * of the addition and reduce the risk of losing a jiffie. + */ + barrier(); + + mod_timer(&engine->execlists.preempt, jiffies + timeout); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1743,6 +1766,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) execlists->switch_priority_hint = switch_prio(engine, *execlists->pending); execlists_submit_ports(engine); + set_preempt_timeout(engine); } else { ring_set_paused(engine, 0); } @@ -1970,6 +1994,38 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) execlists_dequeue(engine); } +static noinline void preempt_reset(struct intel_engine_cs *engine) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (i915_modparams.reset < 3) + return; + + if (test_and_set_bit(bit, lock)) + return; + + /* Mark this tasklet as disabled to avoid waiting for it to complete */ + tasklet_disable_nosync(&engine->execlists.tasklet); + + intel_engine_reset(engine, "preemption time out"); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static bool preempt_timeout(struct intel_engine_cs *const engine) +{ + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return false; + + if (!intel_engine_has_preemption(engine)) + return false; + + return !timer_pending(&engine->execlists.preempt) && + READ_ONCE(engine->execlists.pending[0]); +} + /* * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. @@ -1977,23 +2033,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - unsigned long flags; + bool timeout = preempt_timeout(engine); process_csb(engine); - if (!READ_ONCE(engine->execlists.pending[0])) { + if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { + unsigned long flags; + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); spin_unlock_irqrestore(&engine->active.lock, flags); + + /* Recheck after serialising with direct-submission */ + if (timeout && preempt_timeout(engine)) + preempt_reset(engine); } } -static void execlists_submission_timer(struct timer_list *timer) +static void __execlists_kick(struct intel_engine_execlists *execlists) { - struct intel_engine_cs *engine = - from_timer(engine, timer, execlists.timer); - /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&execlists->tasklet); +} + +#define execlists_kick(t, member) \ + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + +static void execlists_timeslice(struct timer_list *timer) +{ + execlists_kick(timer, timer); +} + +static void execlists_preempt(struct timer_list *timer) +{ + execlists_kick(timer, preempt); } static void queue_request(struct intel_engine_cs *engine, @@ -3417,6 +3489,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) static void execlists_park(struct intel_engine_cs *engine) { del_timer(&engine->execlists.timer); + del_timer(&engine->execlists.preempt); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -3534,7 +3607,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) { tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); - timer_setup(&engine->execlists.timer, execlists_submission_timer, 0); + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index d29ade3b7de6..56058978bb27 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -61,7 +61,7 @@ struct drm_printer; param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \ param(int, edp_vswing, 0) \ - param(int, reset, 2) \ + param(int, reset, 3) \ param(unsigned int, inject_load_failure, 0) \ param(int, fastboot, -1) \ param(int, enable_dpcd_backlight, 0) \ -- 2.23.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx