Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Inject a fault into the engine reset and check that the outstanding > requests are completed despite the failed reset. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 133 +++++++++++++++++++ > 1 file changed, 133 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c > index ffc6eabb6404..875633cc0a75 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c > +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c > @@ -540,6 +540,138 @@ static int igt_reset_nop_engine(void *arg) > return 0; > } > > +static void force_reset_timeout(struct intel_engine_cs *engine) > +{ > + engine->reset_timeout.probability = 999; > + atomic_set(&engine->reset_timeout.times, -1); > +} > + > +static void cancel_reset_timeout(struct intel_engine_cs *engine) > +{ > + memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); > +} > + > +static int igt_reset_fail_engine(void *arg) > +{ > + struct intel_gt *gt = arg; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + > + /* Check that we can engine-reset during non-user portions */ > + > + if (!intel_has_reset_engine(gt)) > + return 0; > + > + for_each_engine(engine, gt, id) { > + unsigned int count; > + struct intel_context *ce; > + IGT_TIMEOUT(end_time); > + int err; > + > + ce = intel_context_create(engine); > + if (IS_ERR(ce)) > + return PTR_ERR(ce); > + > + st_engine_heartbeat_disable(engine); > + set_bit(I915_RESET_ENGINE + id, >->reset.flags); > + count = 0; > + do { > + struct i915_request *last = NULL; > + int i; > + > + if (!wait_for_idle(engine)) { > + pr_err("%s failed to idle before reset\n", > + engine->name); > + err = -EIO; > + break; > + } > + > + for (i = 0; i < 16; i++) { > + struct i915_request *rq; > + > + rq = intel_context_create_request(ce); > + if (IS_ERR(rq)) { > + struct drm_printer p = > + drm_info_printer(gt->i915->drm.dev); > + intel_engine_dump(engine, &p, > + "%s(%s): failed to submit request\n", > + __func__, > + engine->name); > + > + GEM_TRACE("%s(%s): failed to submit request\n", > + __func__, > + engine->name); > + GEM_TRACE_DUMP(); > + > + intel_gt_set_wedged(gt); > + if (last) > + i915_request_put(last); > + > + err = PTR_ERR(rq); > + goto out; > + } > + > + if (last) > + i915_request_put(last); > + last = i915_request_get(rq); > + i915_request_add(rq); > + } > + > + if (count & 1) { > + err = intel_engine_reset(engine, NULL); > + if (err) { > + GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n", > + engine->name, err); > + GEM_TRACE_DUMP(); > + break; > + } > + } else { > + force_reset_timeout(engine); > + err = intel_engine_reset(engine, NULL); We dont promote to global here if the engine one fails? If not, what mechanism then guarantees the request completion. -Mika > + cancel_reset_timeout(engine); > + if (err != -ETIMEDOUT) { > + pr_err("intel_engine_reset(%s) did not fail, err:%d\n", > + engine->name, err); > + break; > + } > + } > + > + err = 0; > + if (i915_request_wait(last, 0, HZ /2) < 0) { > + struct drm_printer p = > + drm_info_printer(gt->i915->drm.dev); > + > + intel_engine_dump(engine, &p, > + "%s(%s): failed to complete request\n", > + __func__, > + engine->name); > + > + GEM_TRACE("%s(%s): failed to complete request\n", > + __func__, > + engine->name); > + GEM_TRACE_DUMP(); > + > + err = -EIO; > + } > + i915_request_put(last); > + count++; > + } while (err == 0 && time_before(jiffies, end_time)); > +out: > + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); > + st_engine_heartbeat_enable(engine); > + > + pr_info("%s(%s): %d resets\n", __func__, engine->name, count); > + > + intel_context_put(ce); > + if (igt_flush_test(gt->i915)) > + err = -EIO; > + if (err) > + return err; > + } > + > + return 0; > +} > + > static int __igt_reset_engine(struct intel_gt *gt, bool active) > { > struct i915_gpu_error *global = >->i915->gpu_error; > @@ -1694,6 +1826,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) > SUBTEST(igt_reset_nop_engine), > SUBTEST(igt_reset_idle_engine), > SUBTEST(igt_reset_active_engine), > + SUBTEST(igt_reset_fail_engine), > SUBTEST(igt_reset_engines), > SUBTEST(igt_reset_engines_atomic), > SUBTEST(igt_reset_queue), > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx