On Thu, 18 Mar 2021 at 17:04, Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> wrote: > > From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Currently, we cancel outstanding requests within a context when the > context is closed. We may also want to cancel individual requests using > the same graceful preemption mechanism. > > v2 (Tvrtko): > * Cancel waiters carefully considering no timeline lock and RCU. > * Fixed selftests. > > v3 (Tvrtko): > * Remove error propagation to waiters for now. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 1 + > .../drm/i915/gt/intel_execlists_submission.c | 9 +- > drivers/gpu/drm/i915/i915_request.c | 52 ++++- > drivers/gpu/drm/i915/i915_request.h | 4 +- > drivers/gpu/drm/i915/selftests/i915_request.c | 201 ++++++++++++++++++ > 5 files changed, 261 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c > index 0b062fad1837..e2fb3ae2aaf3 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c > @@ -314,6 +314,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine) > mutex_unlock(&ce->timeline->mutex); > } > > + intel_engine_flush_scheduler(engine); > intel_engine_pm_put(engine); > return err; > } > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index 85ff5fe861b4..4c2acb5a6c0a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -421,6 +421,11 @@ static void reset_active(struct i915_request *rq, > ce->lrc.lrca = lrc_update_regs(ce, engine, head); > } > > +static bool bad_request(const struct i915_request *rq) > +{ > + return rq->fence.error && i915_request_started(rq); > +} > + > static struct intel_engine_cs * > __execlists_schedule_in(struct i915_request *rq) > { > @@ -433,7 +438,7 @@ __execlists_schedule_in(struct i915_request *rq) > !intel_engine_has_heartbeat(engine))) > intel_context_set_banned(ce); > > - if (unlikely(intel_context_is_banned(ce))) > + if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) > reset_active(rq, engine); > > if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) > @@ -1112,7 +1117,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, > return 0; > > /* Force a fast reset for terminated contexts (ignoring sysfs!) */ > - if (unlikely(intel_context_is_banned(rq->context))) > + if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) > return 1; > > return READ_ONCE(engine->props.preempt_timeout_ms); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index e7b4c4bc41a6..b4511ac05e9a 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -33,7 +33,10 @@ > #include "gem/i915_gem_context.h" > #include "gt/intel_breadcrumbs.h" > #include "gt/intel_context.h" > +#include "gt/intel_engine.h" > +#include "gt/intel_engine_heartbeat.h" > #include "gt/intel_gpu_commands.h" > +#include "gt/intel_reset.h" > #include "gt/intel_ring.h" > #include "gt/intel_rps.h" > > @@ -429,20 +432,22 @@ void __i915_request_skip(struct i915_request *rq) > rq->infix = rq->postfix; > } > > -void i915_request_set_error_once(struct i915_request *rq, int error) > +bool i915_request_set_error_once(struct i915_request *rq, int error) > { > int old; > > GEM_BUG_ON(!IS_ERR_VALUE((long)error)); > > if (i915_request_signaled(rq)) > - return; > + return false; > > old = READ_ONCE(rq->fence.error); > do { > if (fatal_error(old)) > - return; > + return false; > } while (!try_cmpxchg(&rq->fence.error, &old, error)); > + > + return true; > } > > struct i915_request *i915_request_mark_eio(struct i915_request *rq) > @@ -609,6 +614,47 @@ void i915_request_unsubmit(struct i915_request *request) > spin_unlock_irqrestore(&se->lock, flags); > } > > +static struct intel_engine_cs *active_engine(struct i915_request *rq) > +{ > + struct intel_engine_cs *engine, *locked; > + > + locked = READ_ONCE(rq->engine); > + spin_lock_irq(&locked->sched.lock); > + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { > + spin_unlock(&locked->sched.lock); > + locked = engine; > + spin_lock(&locked->sched.lock); > + } > + > + engine = NULL; > + if (i915_request_is_active(rq) && !__i915_request_is_complete(rq)) > + engine = locked; > + > + spin_unlock_irq(&locked->sched.lock); > + > + return engine; Bad idea to reuse __active_engine() somehow? Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx