On 22/03/2021 15:38, Matthew Auld wrote:
On Thu, 18 Mar 2021 at 17:04, Tvrtko Ursulin
<tvrtko.ursulin@xxxxxxxxxxxxxxx> wrote:
From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Currently, we cancel outstanding requests within a context when the
context is closed. We may also want to cancel individual requests using
the same graceful preemption mechanism.
v2 (Tvrtko):
* Cancel waiters carefully considering no timeline lock and RCU.
* Fixed selftests.
v3 (Tvrtko):
* Remove error propagation to waiters for now.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 1 +
.../drm/i915/gt/intel_execlists_submission.c | 9 +-
drivers/gpu/drm/i915/i915_request.c | 52 ++++-
drivers/gpu/drm/i915/i915_request.h | 4 +-
drivers/gpu/drm/i915/selftests/i915_request.c | 201 ++++++++++++++++++
5 files changed, 261 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 0b062fad1837..e2fb3ae2aaf3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -314,6 +314,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
mutex_unlock(&ce->timeline->mutex);
}
+ intel_engine_flush_scheduler(engine);
intel_engine_pm_put(engine);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 85ff5fe861b4..4c2acb5a6c0a 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -421,6 +421,11 @@ static void reset_active(struct i915_request *rq,
ce->lrc.lrca = lrc_update_regs(ce, engine, head);
}
+static bool bad_request(const struct i915_request *rq)
+{
+ return rq->fence.error && i915_request_started(rq);
+}
+
static struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -433,7 +438,7 @@ __execlists_schedule_in(struct i915_request *rq)
!intel_engine_has_heartbeat(engine)))
intel_context_set_banned(ce);
- if (unlikely(intel_context_is_banned(ce)))
+ if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
reset_active(rq, engine);
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1112,7 +1117,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
return 0;
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
- if (unlikely(intel_context_is_banned(rq->context)))
+ if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
return 1;
return READ_ONCE(engine->props.preempt_timeout_ms);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e7b4c4bc41a6..b4511ac05e9a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -33,7 +33,10 @@
#include "gem/i915_gem_context.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
+#include "gt/intel_engine.h"
+#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gpu_commands.h"
+#include "gt/intel_reset.h"
#include "gt/intel_ring.h"
#include "gt/intel_rps.h"
@@ -429,20 +432,22 @@ void __i915_request_skip(struct i915_request *rq)
rq->infix = rq->postfix;
}
-void i915_request_set_error_once(struct i915_request *rq, int error)
+bool i915_request_set_error_once(struct i915_request *rq, int error)
{
int old;
GEM_BUG_ON(!IS_ERR_VALUE((long)error));
if (i915_request_signaled(rq))
- return;
+ return false;
old = READ_ONCE(rq->fence.error);
do {
if (fatal_error(old))
- return;
+ return false;
} while (!try_cmpxchg(&rq->fence.error, &old, error));
+
+ return true;
}
struct i915_request *i915_request_mark_eio(struct i915_request *rq)
@@ -609,6 +614,47 @@ void i915_request_unsubmit(struct i915_request *request)
spin_unlock_irqrestore(&se->lock, flags);
}
+static struct intel_engine_cs *active_engine(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine, *locked;
+
+ locked = READ_ONCE(rq->engine);
+ spin_lock_irq(&locked->sched.lock);
+ while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
+ spin_unlock(&locked->sched.lock);
+ locked = engine;
+ spin_lock(&locked->sched.lock);
+ }
+
+ engine = NULL;
+ if (i915_request_is_active(rq) && !__i915_request_is_complete(rq))
+ engine = locked;
+
+ spin_unlock_irq(&locked->sched.lock);
+
+ return engine;
Bad idea to reuse __active_engine() somehow?
I can try and see how it ends up looking.
Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx>
Thanks,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx