On Thu, 2020-10-15 at 09:40 +0200, Marcin Bernatowicz wrote: > On Wed, 2020-10-14 at 18:55 +0200, Janusz Krzysztofik wrote: > > The test was designed to keep track of open device file descriptors > > for safe driver unbind on recovery from a failed subtest. In that > > context, fences introduced by commit 1fbd127bd4e1 ("core_hotplug: > > Teach the healthcheck how to check execution status") can affect > > device > > recovery as much as an open device file if not closed before unbind. > > > > Moreover, forced GPU reset which used to be applied on recovery from > > a > > failed i915 GPU health check is no longer reachable since a GPU hang > > hopefully detected by the new health check algorithm can now break > > the > > whole recovery procedure prematurely. > > > > Refactor local_i915_healthcheck() so it takes care of closing fences > > and returns a result to its caller instead of long jumping on > > failures > > believed to be recoverable. While avoiding use of igt_assert() and > > friends, report actual source and error code of failures via > > igt_warn_on_f(). > > > > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@xxxxxxxxxxxxxxx > > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > --- > > tests/core_hotunplug.c | 46 ++++++++++++++++++++++++++++++++------ > > ---- > > 1 file changed, 35 insertions(+), 11 deletions(-) > > > > diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c > > index 70669c590..d6db02bad 100644 > > --- a/tests/core_hotunplug.c > > +++ b/tests/core_hotunplug.c > > @@ -233,9 +233,9 @@ static int merge_fences(int old, int new) > > return new; > > > > merge = sync_fence_merge(old, new); > > - igt_assert(merge != -1); > > - close(old); > > - close(new); > > + /* Assume fence close errors don't affect device close status > > */ > > + igt_ignore_warn(local_close(old, "old fence close failed")); > > + igt_ignore_warn(local_close(new, "new fence close failed")); > > > > return merge; > > } > > @@ -249,29 +249,53 @@ static int local_i915_healthcheck(int i915, > > const char *prefix) > > .buffer_count = 1, > > }; > > const struct intel_execution_engine2 *engine; > > - int fence = -1; > > + int fence = -1, err = 0, status = 1; > > > > local_debug("%s%s\n", prefix, "running i915 GPU healthcheck"); > > - if (local_i915_is_wedged(i915)) > > + if (igt_warn_on_f(local_i915_is_wedged(i915), "GPU found > > wedged\n")) > > return -EIO; > > > > + /* Assume gem_create()/gem_write() failures are unrecoverable > > */ > > obj.handle = gem_create(i915, 4096); > > gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe)); > > > > + /* As soon as a fence is open, don't fail before closing it */ > > __for_each_physical_engine(i915, engine) { > > execbuf.flags = engine->flags | I915_EXEC_FENCE_OUT; > > - gem_execbuf_wr(i915, &execbuf); > > + err = __gem_execbuf_wr(i915, &execbuf); > > + if (igt_warn_on_f(err < 0, "__gem_execbuf_wr() returned > > %d\n", > > + err)) > > + break; > > > > fence = merge_fences(fence, execbuf.rsvd2 >> 32); > > + if (igt_warn_on_f(fence < 0, "merge_fences() returned > > %d\n", > > + fence)) { > > + err = fence; > > + break; > > + } > > + } > > + if (fence >= 0) { > > + status = sync_fence_wait(fence, -1); > > + if (igt_warn_on_f(status < 0, "sync_fence_wait() > > returned %d\n", > > + status)) > > + err = status; > > + if (!err) > > + status = sync_fence_status(fence); > > + > > + /* Assume fence close errors don't affect device close > > status */ > > + igt_ignore_warn(local_close(fence, "fence close > > failed")); > > } > > - igt_assert(fence != -1); > > + > > + /* Assume gem_close() failure is unrecoverable */ > > gem_close(i915, obj.handle); > > > > - igt_assert_eq(sync_fence_wait(fence, -1), 0); > > - igt_assert_eq(sync_fence_status(fence), 1); > > - close(fence); > > + if (err < 0) > > + return err; > > + if (igt_warn_on_f(status != 1, "sync_fence_status() returned > > %d\n", > > + status)) > > + return -1; > > > > - if (local_i915_is_wedged(i915)) > > + if (igt_warn_on_f(local_i915_is_wedged(i915), "GPU turned > > wedged\n")) > > return -EIO; > LGTM, > Reviewed-by: Marcin Bernatowicz <marcin.bernatowicz@xxxxxxxxxxxxxxx> Thanks Marcin, pushed. Unfortunately I forgot to include the R-b and pushed without it so it will exist only in the list archives, sorry. Thanks, Janusz > > > > > return 0; _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx