On Fri, Jul 16, 2021 at 01:17:18PM -0700, Matthew Brost wrote: > From: Rahul Kumar Singh <rahul.kumar.singh@xxxxxxxxx> > > When GuC submission is enabled, the GuC controls engine resets. Rather > than explicitly triggering a reset, the driver must submit a hanging > context to GuC and wait for the reset to occur. > > Signed-off-by: Rahul Kumar Singh <rahul.kumar.singh@xxxxxxxxx> > Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> > Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > Cc: Matthew Brost <matthew.brost@xxxxxxxxx> Reviewed-by: Matthew Brost <matthew.brost@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 1 + > .../gpu/drm/i915/gt/selftest_workarounds.c | 130 +++++++++++++----- > .../i915/selftests/intel_scheduler_helpers.c | 76 ++++++++++ > .../i915/selftests/intel_scheduler_helpers.h | 28 ++++ > 4 files changed, 201 insertions(+), 34 deletions(-) > create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c > create mode 100644 drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 10b3bb6207ba..ab7679957623 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -280,6 +280,7 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o > i915-$(CONFIG_DRM_I915_SELFTEST) += \ > gem/selftests/i915_gem_client_blt.o \ > gem/selftests/igt_gem_utils.o \ > + selftests/intel_scheduler_helpers.o \ > selftests/i915_random.o \ > selftests/i915_selftest.o \ > selftests/igt_atomic.o \ > diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c > index 7ebc4edb8ecf..7727bc531ea9 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c > +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c > @@ -12,6 +12,7 @@ > #include "selftests/igt_flush_test.h" > #include "selftests/igt_reset.h" > #include "selftests/igt_spinner.h" > +#include "selftests/intel_scheduler_helpers.h" > #include "selftests/mock_drm.h" > > #include "gem/selftests/igt_gem_utils.h" > @@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine) > return intel_engine_reset(engine, "live_workarounds"); > } > > +static int do_guc_reset(struct intel_engine_cs *engine) > +{ > + /* Currently a no-op as the reset is handled by GuC */ > + return 0; > +} > + > static int > switch_to_scratch_context(struct intel_engine_cs *engine, > - struct igt_spinner *spin) > + struct igt_spinner *spin, > + struct i915_request **rq) > { > struct intel_context *ce; > - struct i915_request *rq; > int err = 0; > > ce = intel_context_create(engine); > if (IS_ERR(ce)) > return PTR_ERR(ce); > > - rq = igt_spinner_create_request(spin, ce, MI_NOOP); > + *rq = igt_spinner_create_request(spin, ce, MI_NOOP); > intel_context_put(ce); > > - if (IS_ERR(rq)) { > + if (IS_ERR(*rq)) { > spin = NULL; > - err = PTR_ERR(rq); > + err = PTR_ERR(*rq); > goto err; > } > > - err = request_add_spin(rq, spin); > + err = request_add_spin(*rq, spin); > err: > if (err && spin) > igt_spinner_end(spin); > @@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, > { > struct intel_context *ce, *tmp; > struct igt_spinner spin; > + struct i915_request *rq; > intel_wakeref_t wakeref; > int err; > > @@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, > goto out_spin; > } > > - err = switch_to_scratch_context(engine, &spin); > + err = switch_to_scratch_context(engine, &spin, &rq); > if (err) > goto out_spin; > > + /* Ensure the spinner hasn't aborted */ > + if (i915_request_completed(rq)) { > + pr_err("%s spinner failed to start\n", name); > + err = -ETIMEDOUT; > + goto out_spin; > + } > + > with_intel_runtime_pm(engine->uncore->rpm, wakeref) > err = reset(engine); > > + /* Ensure the reset happens and kills the engine */ > + if (err == 0) > + err = intel_selftest_wait_for_rq(rq); > + > igt_spinner_end(&spin); > > if (err) { > @@ -787,9 +806,26 @@ static int live_reset_whitelist(void *arg) > continue; > > if (intel_has_reset_engine(gt)) { > - err = check_whitelist_across_reset(engine, > - do_engine_reset, > - "engine"); > + if (intel_engine_uses_guc(engine)) { > + struct intel_selftest_saved_policy saved; > + int err2; > + > + err = intel_selftest_modify_policy(engine, &saved); > + if(err) > + goto out; > + > + err = check_whitelist_across_reset(engine, > + do_guc_reset, > + "guc"); > + > + err2 = intel_selftest_restore_policy(engine, &saved); > + if (err == 0) > + err = err2; > + } else > + err = check_whitelist_across_reset(engine, > + do_engine_reset, > + "engine"); > + > if (err) > goto out; > } > @@ -1226,31 +1262,41 @@ live_engine_reset_workarounds(void *arg) > reference_lists_init(gt, &lists); > > for_each_engine(engine, gt, id) { > + struct intel_selftest_saved_policy saved; > + bool using_guc = intel_engine_uses_guc(engine); > bool ok; > + int ret2; > > pr_info("Verifying after %s reset...\n", engine->name); > + ret = intel_selftest_modify_policy(engine, &saved); > + if (ret) > + break; > + > + > ce = intel_context_create(engine); > if (IS_ERR(ce)) { > ret = PTR_ERR(ce); > - break; > + goto restore; > } > > - ok = verify_wa_lists(gt, &lists, "before reset"); > - if (!ok) { > - ret = -ESRCH; > - goto err; > - } > + if (!using_guc) { > + ok = verify_wa_lists(gt, &lists, "before reset"); > + if (!ok) { > + ret = -ESRCH; > + goto err; > + } > > - ret = intel_engine_reset(engine, "live_workarounds:idle"); > - if (ret) { > - pr_err("%s: Reset failed while idle\n", engine->name); > - goto err; > - } > + ret = intel_engine_reset(engine, "live_workarounds:idle"); > + if (ret) { > + pr_err("%s: Reset failed while idle\n", engine->name); > + goto err; > + } > > - ok = verify_wa_lists(gt, &lists, "after idle reset"); > - if (!ok) { > - ret = -ESRCH; > - goto err; > + ok = verify_wa_lists(gt, &lists, "after idle reset"); > + if (!ok) { > + ret = -ESRCH; > + goto err; > + } > } > > ret = igt_spinner_init(&spin, engine->gt); > @@ -1271,25 +1317,41 @@ live_engine_reset_workarounds(void *arg) > goto err; > } > > - ret = intel_engine_reset(engine, "live_workarounds:active"); > - if (ret) { > - pr_err("%s: Reset failed on an active spinner\n", > - engine->name); > - igt_spinner_fini(&spin); > - goto err; > + /* Ensure the spinner hasn't aborted */ > + if (i915_request_completed(rq)) { > + ret = -ETIMEDOUT; > + goto skip; > + } > + > + if (!using_guc) { > + ret = intel_engine_reset(engine, "live_workarounds:active"); > + if (ret) { > + pr_err("%s: Reset failed on an active spinner\n", > + engine->name); > + igt_spinner_fini(&spin); > + goto err; > + } > } > > + /* Ensure the reset happens and kills the engine */ > + if (ret == 0) > + ret = intel_selftest_wait_for_rq(rq); > + > +skip: > igt_spinner_end(&spin); > igt_spinner_fini(&spin); > > ok = verify_wa_lists(gt, &lists, "after busy reset"); > - if (!ok) { > + if (!ok) > ret = -ESRCH; > - goto err; > - } > > err: > intel_context_put(ce); > + > +restore: > + ret2 = intel_selftest_restore_policy(engine, &saved); > + if (ret == 0) > + ret = ret2; > if (ret) > break; > } > diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c > new file mode 100644 > index 000000000000..91ecd8a1bd21 > --- /dev/null > +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c > @@ -0,0 +1,76 @@ > +/* > + * SPDX-License-Identifier: MIT > + * > + * Copyright © 2018 Intel Corporation > + */ > + > +//#include "gt/intel_engine_user.h" > +#include "gt/intel_gt.h" > +#include "i915_drv.h" > +#include "i915_selftest.h" > + > +#include "selftests/intel_scheduler_helpers.h" > + > +#define REDUCED_TIMESLICE 5 > +#define REDUCED_PREEMPT 10 > +#define WAIT_FOR_RESET_TIME 1000 > + > +int intel_selftest_modify_policy(struct intel_engine_cs *engine, > + struct intel_selftest_saved_policy *saved) > + > +{ > + int err; > + > + saved->reset = engine->i915->params.reset; > + saved->flags = engine->flags; > + saved->timeslice = engine->props.timeslice_duration_ms; > + saved->preempt_timeout = engine->props.preempt_timeout_ms; > + > + /* > + * Enable force pre-emption on time slice expiration > + * together with engine reset on pre-emption timeout. > + * This is required to make the GuC notice and reset > + * the single hanging context. > + * Also, reduce the preemption timeout to something > + * small to speed the test up. > + */ > + engine->i915->params.reset = 2; > + engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION; > + engine->props.timeslice_duration_ms = REDUCED_TIMESLICE; > + engine->props.preempt_timeout_ms = REDUCED_PREEMPT; > + > + if (!intel_engine_uses_guc(engine)) > + return 0; > + > + err = intel_guc_global_policies_update(&engine->gt->uc.guc); > + if (err) > + intel_selftest_restore_policy(engine, saved); > + > + return err; > +} > + > +int intel_selftest_restore_policy(struct intel_engine_cs *engine, > + struct intel_selftest_saved_policy *saved) > +{ > + /* Restore the original policies */ > + engine->i915->params.reset = saved->reset; > + engine->flags = saved->flags; > + engine->props.timeslice_duration_ms = saved->timeslice; > + engine->props.preempt_timeout_ms = saved->preempt_timeout; > + > + if (!intel_engine_uses_guc(engine)) > + return 0; > + > + return intel_guc_global_policies_update(&engine->gt->uc.guc); > +} > + > +int intel_selftest_wait_for_rq(struct i915_request *rq) > +{ > + long ret; > + > + ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME); > + if (ret < 0) > + return ret; > + > + return 0; > +} > diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h > new file mode 100644 > index 000000000000..f30e96f0ba95 > --- /dev/null > +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h > @@ -0,0 +1,28 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright © 2014-2019 Intel Corporation > + */ > + > +#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ > +#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ > + > +#include <linux/types.h> > + > +struct i915_request; > +struct intel_engine_cs; > + > +struct intel_selftest_saved_policy > +{ > + u32 flags; > + u32 reset; > + u64 timeslice; > + u64 preempt_timeout; > +}; > + > +int intel_selftest_modify_policy(struct intel_engine_cs *engine, > + struct intel_selftest_saved_policy *saved); > +int intel_selftest_restore_policy(struct intel_engine_cs *engine, > + struct intel_selftest_saved_policy *saved); > +int intel_selftest_wait_for_rq( struct i915_request *rq); > + > +#endif > -- > 2.28.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx