> -----Original Message----- > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > Sent: Wednesday, April 5, 2023 2:01 PM > To: Upadhyay, Tejas <tejas.upadhyay@xxxxxxxxx>; Intel- > GFX@xxxxxxxxxxxxxxxxxxxxx > Subject: Re: [PATCH] drm/i915/gt: Consider multi-gt at all places > > > On 05/04/2023 07:56, Upadhyay, Tejas wrote: > >>> -int igt_live_test_end(struct igt_live_test *t) > >>> +int igt_live_test_end(struct igt_live_test *t, struct intel_gt *gt) > >>> { > >>> - struct drm_i915_private *i915 = t->i915; > >>> + struct drm_i915_private *i915 = gt->i915; > >>> struct intel_engine_cs *engine; > >>> enum intel_engine_id id; > >>> > >>> @@ -57,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t) > >>> return -EIO; > >>> } > >>> > >>> - for_each_engine(engine, to_gt(i915), id) { > >>> + for_each_engine(engine, gt, id) { > >>> if (t->reset_engine[id] == > >>> i915_reset_engine_count(&i915->gpu_error, engine)) > >>> continue; > >>> diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h > >>> b/drivers/gpu/drm/i915/selftests/igt_live_test.h > >>> index 36ed42736c52..209b0548c603 100644 > >>> --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h > >>> +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h > >>> @@ -27,9 +27,9 @@ struct igt_live_test { > >>> * e.g. if the GPU was reset. > >>> */ > >>> int igt_live_test_begin(struct igt_live_test *t, > >>> - struct drm_i915_private *i915, > >>> + struct intel_gt *gt, > >>> const char *func, > >>> const char *name); > >>> -int igt_live_test_end(struct igt_live_test *t); > >>> +int igt_live_test_end(struct igt_live_test *t, struct intel_gt > >>> +*gt); > >> > >> Back in the day the plan was that live selftests are device focused > >> and then we also have intel_gt_live_subtests, which are obviously GT > >> focused. So in that sense adding a single GT parameter to > >> igt_live_test_begin isn't something I immediately understand. > >> > >> Could you explain in one or two practical examples what is not > >> working properly and how is this patch fixing it? > > > > For example you are running test "live_all_engines(void *arg)", > > > > -- Below test begin, will reset counters for primary GT - Tile0 -- err > > = igt_live_test_begin(&t, to_gt(i915), __func__, ""); > > if (err) > > goto out_free; > > > > --- Now we loop for all engines, note here for MTL vcs, vecs engines are not > on primary GT or tile 0, > > So counters did not reset on test begin does not cover them. --- > > > > In test_begin, below will not reset count for vcs, vecs engines on MTL, > > for_each_engine(engine, gt, id) > > t->reset_engine[id] = > > i915_reset_engine_count(&i915->gpu_error, > > engine); > > > > --- Then below will end test, again for primary GT where above > > mentioned engines are not there --- err = igt_live_test_end(&t, > > to_gt(i915)); > > > > In short to me it looks like igt_live_test for device needs attention when we > have different engines on different GTs like MTL. > > If you either add for_each_gt to that for_each_engine in igt_live_test_begin > and igt_live_test_end, or alternatively for_each_uabi_engine (maybe misses > some internal engines?), everything works? That would be much smaller > patch and wouldn't falsely associate live tests with a single gt. > Below would work, if you agree I will rearrange and send patch. --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -16,28 +16,31 @@ int igt_live_test_begin(struct igt_live_test *t, const char *func, const char *name) { - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; struct intel_engine_cs *engine; enum intel_engine_id id; int err; + unsigned int i; - t->i915 = i915; - t->func = func; - t->name = name; + for_each_gt(gt, i915, i) { + t->i915 = i915; + t->func = func; + t->name = name; - err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); - if (err) { - pr_err("%s(%s): failed to idle before, with err=%d!", - func, name, err); - return err; - } + err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } - t->reset_global = i915_reset_count(&i915->gpu_error); + t->reset_global = i915_reset_count(&i915->gpu_error); - for_each_engine(engine, gt, id) - t->reset_engine[id] = + for_each_engine(engine, gt, id) + t->reset_engine[id] = i915_reset_engine_count(&i915->gpu_error, engine); + } return 0; } @@ -46,6 +49,7 @@ int igt_live_test_end(struct igt_live_test *t) struct drm_i915_private *i915 = t->i915; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned int i; if (igt_flush_test(i915)) return -EIO; @@ -57,17 +61,18 @@ int igt_live_test_end(struct igt_live_test *t) return -EIO; } - for_each_engine(engine, to_gt(i915), id) { - if (t->reset_engine[id] == - i915_reset_engine_count(&i915->gpu_error, engine)) - continue; + for_each_gt(gt, i915, i) { + for_each_engine(engine, gt, id) { + if (t->reset_engine[id] == + i915_reset_engine_count(&i915->gpu_error, engine)) + continue; - pr_err("%s(%s): engine '%s' was reset %d times!\n", - t->func, t->name, engine->name, - i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); - return -EIO; + pr_err("%s(%s): engine '%s' was reset %d times!\n", + t->func, t->name, engine->name, + i915_reset_engine_count(&i915->gpu_error, engine) - + t->reset_engine[id]); + return -EIO; + } } Regards, Tejas > Regards, > > Tvrtko