Quoting jeff.mcgee@xxxxxxxxx (2018-03-16 18:31:05) > From: Jeff McGee <jeff.mcgee@xxxxxxxxx> > > The hardware can complete the requested preemption at only certain points > in execution. Thus an uncooperative request that avoids those points can > block a preemption indefinitely. Our only option to bound the preemption > latency is to trigger reset and recovery just as we would if a request had > hung the hardware. This is so-called forced preemption. This change adds > that capability as an option for systems with extremely strict scheduling > latency requirements for its high priority requests. This option must be > used with great care. The force-preempted requests will be discarded at > the point of reset, resulting in various degrees of disruption to the > owning application up to and including crash. > > The option is enabled by specifying a non-zero value for new i915 module > parameter fpreempt_timeout. This value becomes the time in milliseconds > after initiation of preemption at which the reset is triggered if the > preemption has not completed normally. > > Test: Run IGT gem_exec_fpreempt. > Change-Id: Iafd3609012621c57fa9e490dfeeac46ae541b5c2 > Signed-off-by: Jeff McGee <jeff.mcgee@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.c | 14 ++++++++- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem.c | 37 +++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_params.c | 3 ++ > drivers/gpu/drm/i915/i915_params.h | 1 + > drivers/gpu/drm/i915/intel_engine_cs.c | 53 +++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 24 ++++++++++++++- > drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++ > 8 files changed, 136 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index b627370d5a9c..3f2394d61ea2 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -811,8 +811,16 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) > if (dev_priv->hotplug.dp_wq == NULL) > goto out_free_wq; > > + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption) { > + dev_priv->fpreempt_wq = alloc_ordered_workqueue("i915-fpreempt", > + WQ_HIGHPRI); > + if (dev_priv->fpreempt_wq == NULL) > + goto out_free_dp_wq; Doesn't require ordering, the resets are if either required to fall back to a full reset. > + } > return 0; > > +out_free_dp_wq: > + destroy_workqueue(dev_priv->hotplug.dp_wq); > out_free_wq: > destroy_workqueue(dev_priv->wq); > out_err: > @@ -832,6 +840,8 @@ static void i915_engines_cleanup(struct drm_i915_private *i915) > > static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) > { > + if (INTEL_INFO(dev_priv)->has_logical_ring_preemption) > + destroy_workqueue(dev_priv->fpreempt_wq); > destroy_workqueue(dev_priv->hotplug.dp_wq); > destroy_workqueue(dev_priv->wq); > } > @@ -2007,7 +2017,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) > > if (!(flags & I915_RESET_QUIET)) { > dev_notice(engine->i915->drm.dev, > - "Resetting %s after gpu hang\n", engine->name); > + "Resetting %s %s\n", engine->name, > + engine->fpreempt_active ? > + "for force preemption" : "after gpu hang"); > } > error->reset_engine_count[engine->id]++; > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index ade09f97be5c..514e640d8406 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2290,6 +2290,8 @@ struct drm_i915_private { > */ > struct workqueue_struct *wq; > > + struct workqueue_struct *fpreempt_wq; > + > /* Display functions */ > struct drm_i915_display_funcs display; > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 9780d9026ce6..d556743c578a 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2811,9 +2811,21 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) > return request; > } > > -static bool engine_stalled(struct intel_engine_cs *engine) > +static bool engine_stalled(struct intel_engine_cs *engine, > + struct drm_i915_gem_request *request) > { > if (!intel_vgpu_active(engine->i915)) { > + if (engine->fpreempt_active) { > + /* Pardon the request if it managed to complete or > + * preempt prior to the reset. > + */ > + if (i915_gem_request_completed(request) || > + intel_engine_preempt_finished(engine)) > + return false; > + No, once you pull the trigger just go through with it. You should never pull the trigger if you are worried about the consequences. > + return true; > + } > + > if (!engine->hangcheck.stalled) > return false; > > @@ -2858,6 +2870,13 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) > tasklet_kill(&engine->execlists.irq_tasklet); > tasklet_disable(&engine->execlists.irq_tasklet); > > + /* There may be a force preemption timer active on this engine but > + * not yet expired, i.e. not the reason we are about to reset this > + * engine. Cancel it. If force preemption timeout is the reason we > + * are resetting the engine, this call will have no efffect. > + */ Done by just clearing execlists->active. > + intel_engine_cancel_fpreempt(engine); > + > if (engine->irq_seqno_barrier) > engine->irq_seqno_barrier(engine); > > @@ -2958,7 +2977,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, > * subsequent hangs. > */ > > - if (engine_stalled(engine)) { > + if (engine_stalled(engine, request)) { > i915_gem_context_mark_guilty(request->ctx); > skip_request(request); > > @@ -2966,6 +2985,13 @@ i915_gem_reset_request(struct intel_engine_cs *engine, > if (i915_gem_context_is_banned(request->ctx)) > engine_skip_context(request); > } else { > + /* If the request that we just pardoned was the target of a > + * force preemption there is no possibility of the next > + * request in line having started. > + */ > + if (engine->fpreempt_active) > + return NULL; > + > /* > * Since this is not the hung engine, it may have advanced > * since the hang declaration. Double check by refinding > @@ -3035,6 +3061,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) > > void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) > { > + /* Mark any active force preemption as complete then kick > + * the tasklet. > + */ > + engine->fpreempt_active = false; > + if (engine->execlists.first) > + tasklet_schedule(&engine->execlists.irq_tasklet); Stray chunk. > + > tasklet_enable(&engine->execlists.irq_tasklet); > kthread_unpark(engine->breadcrumbs.signaler); > } > diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c > index 63751a1de74a..9a0deb6e3920 100644 > --- a/drivers/gpu/drm/i915/i915_params.c > +++ b/drivers/gpu/drm/i915/i915_params.c > @@ -247,3 +247,6 @@ MODULE_PARM_DESC(enable_conformance_check, "To toggle the GVT guest conformance > > module_param_named(disable_gvt_fw_loading, i915_modparams.disable_gvt_fw_loading, bool, 0400); > MODULE_PARM_DESC(disable_gvt_fw_loading, "Disable GVT-g fw loading."); > + > +i915_param_named(fpreempt_timeout, uint, 0600, > + "Wait time in msecs before forcing a preemption with reset (0:never force [default])"); > diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h > index e8c2ba4cb1e6..65fbffa6333c 100644 > --- a/drivers/gpu/drm/i915/i915_params.h > +++ b/drivers/gpu/drm/i915/i915_params.h > @@ -54,6 +54,7 @@ > func(int, edp_vswing); \ > func(int, reset); \ > func(unsigned int, inject_load_failure); \ > + func(unsigned int, fpreempt_timeout); \ > /* leave bools at the end to not create holes */ \ > func(bool, alpha_support); \ > func(bool, enable_cmd_parser); \ > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index c9bf2347f7a4..af6cc2d0f7e9 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -411,6 +411,58 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) > execlists->first = NULL; > } > > +void intel_engine_queue_fpreempt(struct intel_engine_cs *engine) > +{ > + unsigned int timeout = i915_modparams.fpreempt_timeout; > + > + if (!timeout) > + return; > + > + GEM_BUG_ON(engine->fpreempt_active); > + hrtimer_start(&engine->fpreempt_timer, > + ms_to_ktime(timeout), HRTIMER_MODE_REL); > +} > + > +bool intel_engine_cancel_fpreempt(struct intel_engine_cs *engine) > +{ > + hrtimer_cancel(&engine->fpreempt_timer); > + > + return !engine->fpreempt_active; > +} > + > +static enum hrtimer_restart > +intel_engine_fpreempt_timer(struct hrtimer *hrtimer) > +{ > + struct intel_engine_cs *engine = > + container_of(hrtimer, struct intel_engine_cs, > + fpreempt_timer); > + > + engine->fpreempt_active = true; > + queue_work(engine->i915->fpreempt_wq, &engine->fpreempt_work); Ah I see you didn't check. > + > + return HRTIMER_NORESTART; > +} > + > +static void intel_engine_fpreempt_work(struct work_struct *work) > +{ > + struct intel_engine_cs *engine = > + container_of(work, struct intel_engine_cs, > + fpreempt_work); > + > + i915_handle_reset(engine->i915, intel_engine_flag(engine)); > +} > + > +static void intel_engine_init_fpreempt(struct intel_engine_cs *engine) > +{ > + if (!INTEL_INFO(engine->i915)->has_logical_ring_preemption) > + return; > + > + hrtimer_init(&engine->fpreempt_timer, > + CLOCK_MONOTONIC, HRTIMER_MODE_REL); > + engine->fpreempt_timer.function = intel_engine_fpreempt_timer; > + INIT_WORK(&engine->fpreempt_work, intel_engine_fpreempt_work); Just set the few pointers. It's not like we are saving anything. > +} > + > /** > * intel_engines_setup_common - setup engine state not requiring hw access > * @engine: Engine to setup. > @@ -426,6 +478,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) > > intel_engine_init_timeline(engine); > intel_engine_init_hangcheck(engine); > + intel_engine_init_fpreempt(engine); > i915_gem_batch_pool_init(engine, &engine->batch_pool); > > intel_engine_init_cmd_parser(engine); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 581483886153..17487f8e8b4c 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -628,6 +628,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > inject_preempt_context(engine); > execlists_set_active(execlists, > EXECLISTS_ACTIVE_PREEMPT); > + intel_engine_queue_fpreempt(engine); > goto unlock; > } else { > /* > @@ -846,6 +847,7 @@ static void intel_lrc_irq_handler(unsigned long data) > const u32 *buf = > &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > unsigned int head, tail; > + bool defer = false; > > /* However GVT emulation depends upon intercepting CSB mmio */ > if (unlikely(execlists->csb_use_mmio)) { > @@ -919,6 +921,21 @@ static void intel_lrc_irq_handler(unsigned long data) > > if (status & GEN8_CTX_STATUS_ACTIVE_IDLE && > buf[2*head + 1] == PREEMPT_ID) { > + /* Try to cancel any pending force preemption. > + * If we are too late, hold off on processing > + * the completed preemption until reset has > + * run its course. It should recognize that > + * the engine has preempted to idle then abort > + * the reset. Then we can resume processing > + * at this CSB head. > + */ This can be much simpler than implied here. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx