Re: [PATCH] drm/i915/gt: Consider multi-gt at all places

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 17/04/2023 19:13, Upadhyay, Tejas wrote:


-----Original Message-----
From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx>
Sent: Wednesday, April 5, 2023 2:01 PM
To: Upadhyay, Tejas <tejas.upadhyay@xxxxxxxxx>; Intel-
GFX@xxxxxxxxxxxxxxxxxxxxx
Subject: Re:  [PATCH] drm/i915/gt: Consider multi-gt at all places


On 05/04/2023 07:56, Upadhyay, Tejas wrote:
-int igt_live_test_end(struct igt_live_test *t)
+int igt_live_test_end(struct igt_live_test *t, struct intel_gt *gt)
    {
-	struct drm_i915_private *i915 = t->i915;
+	struct drm_i915_private *i915 = gt->i915;
    	struct intel_engine_cs *engine;
    	enum intel_engine_id id;

@@ -57,7 +57,7 @@ int igt_live_test_end(struct igt_live_test *t)
    		return -EIO;
    	}

-	for_each_engine(engine, to_gt(i915), id) {
+	for_each_engine(engine, gt, id) {
    		if (t->reset_engine[id] ==
    		    i915_reset_engine_count(&i915->gpu_error, engine))
    			continue;
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h
b/drivers/gpu/drm/i915/selftests/igt_live_test.h
index 36ed42736c52..209b0548c603 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -27,9 +27,9 @@ struct igt_live_test {
     * e.g. if the GPU was reset.
     */
    int igt_live_test_begin(struct igt_live_test *t,
-			struct drm_i915_private *i915,
+			struct intel_gt *gt,
    			const char *func,
    			const char *name);
-int igt_live_test_end(struct igt_live_test *t);
+int igt_live_test_end(struct igt_live_test *t, struct intel_gt
+*gt);

Back in the day the plan was that live selftests are device focused
and then we also have intel_gt_live_subtests, which are obviously GT
focused. So in that sense adding a single GT parameter to
igt_live_test_begin isn't something I immediately understand.

Could you explain in one or two practical examples what is not
working properly and how is this patch fixing it?

For example you are running test "live_all_engines(void *arg)",

-- Below test begin, will reset counters for primary GT - Tile0 -- err
= igt_live_test_begin(&t, to_gt(i915), __func__, "");
          if (err)
                  goto out_free;

--- Now we loop for all engines, note here for MTL vcs, vecs engines are not
on primary GT or tile 0,
       So counters did not reset on test begin does not cover them. ---

        In test_begin, below will not reset count for vcs, vecs engines on MTL,
	for_each_engine(engine, gt, id)
                  t->reset_engine[id] =
                          i915_reset_engine_count(&i915->gpu_error,
engine);

--- Then below will end test, again for primary GT where above
mentioned engines are not there --- err = igt_live_test_end(&t,
to_gt(i915));

In short to me it looks like igt_live_test for device needs attention when we
have different engines on different GTs like MTL.

If you either add for_each_gt to that for_each_engine in igt_live_test_begin
and igt_live_test_end, or alternatively for_each_uabi_engine (maybe misses
some internal engines?), everything works? That would be much smaller
patch and wouldn't falsely associate live tests with a single gt.


Below would work, if you agree I will rearrange and send patch.

--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -16,28 +16,31 @@ int igt_live_test_begin(struct igt_live_test *t,
                         const char *func,
                         const char *name)
  {
-       struct intel_gt *gt = to_gt(i915);
+       struct intel_gt *gt;
         struct intel_engine_cs *engine;
         enum intel_engine_id id;
         int err;
+       unsigned int i;

-       t->i915 = i915;
-       t->func = func;
-       t->name = name;
+       for_each_gt(gt, i915, i) {
+               t->i915 = i915;
+               t->func = func;
+               t->name = name;

These three assignments should stay outside the outer loop.


-       err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
-       if (err) {
-               pr_err("%s(%s): failed to idle before, with err=%d!",
-                      func, name, err);
-               return err;
-       }
+               err = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
+               if (err) {
+                       pr_err("%s(%s): failed to idle before, with err=%d!",
+                                       func, name, err);
+                       return err;
+               }

-       t->reset_global = i915_reset_count(&i915->gpu_error);
+               t->reset_global = i915_reset_count(&i915->gpu_error);

Ditto.

Rest looks good to me.

Regards,

Tvrtko


-       for_each_engine(engine, gt, id)
-               t->reset_engine[id] =
+               for_each_engine(engine, gt, id)
+                       t->reset_engine[id] =
                         i915_reset_engine_count(&i915->gpu_error, engine);

+       }
         return 0;
  }

@@ -46,6 +49,7 @@ int igt_live_test_end(struct igt_live_test *t)
         struct drm_i915_private *i915 = t->i915;
         struct intel_engine_cs *engine;
         enum intel_engine_id id;
+       unsigned int i;

         if (igt_flush_test(i915))
                 return -EIO;
@@ -57,17 +61,18 @@ int igt_live_test_end(struct igt_live_test *t)
                 return -EIO;
         }

-       for_each_engine(engine, to_gt(i915), id) {
-               if (t->reset_engine[id] ==
-                   i915_reset_engine_count(&i915->gpu_error, engine))
-                       continue;
+       for_each_gt(gt, i915, i) {
+               for_each_engine(engine, gt, id) {
+                       if (t->reset_engine[id] ==
+                                       i915_reset_engine_count(&i915->gpu_error, engine))
+                               continue;

-               pr_err("%s(%s): engine '%s' was reset %d times!\n",
-                      t->func, t->name, engine->name,
-                      i915_reset_engine_count(&i915->gpu_error, engine) -
-                      t->reset_engine[id]);
-               return -EIO;
+                       pr_err("%s(%s): engine '%s' was reset %d times!\n",
+                                       t->func, t->name, engine->name,
+                                       i915_reset_engine_count(&i915->gpu_error, engine) -
+                                       t->reset_engine[id]);
+                       return -EIO;
+               }
         }
Regards,
Tejas
Regards,

Tvrtko



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux