Subtests now forcibly call or request igt_abort on failures in order to avoid silently leaving an exercised device in an unusable state. However, a failure inside a subtest doesn't always mean the device is no longer working correctly and reboot is needed. On the other hand, if a subtest just fails without aborting, that doesn't mean in turn the device is healthy. We should still perform a device health check in that case before deciding on next steps. Reuse the 'failure' structure field as a mark which is set when a subtest enters a chunk of critical steps which must be followed by a successful health check in order to avoid aborting the test execution. Then, move health checks from inside each subtest body to its individual follow-up igt_fixture section from where device file descriptors potentially left open are closed and the health check is run if theigt_subtest section has been exited with the marker set. Also, precede health check operations with a driver rebind or bus rescan attempt as needed. v2: Start each recovery phase from unconditionally closing file descriptors potentially left open by a subtest before it entered its critical section, - replace igt_require() with 'if() return;' construct in recover() to reduce noise, - replace "subtest failure" message used as a request for healthcheck with a more appropriate "need healthcheck" for clarity, - rebase on current upstream master. Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@xxxxxxxxxxxxxxx> Reviewed-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> # v1 --- tests/core_hotunplug.c | 108 +++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c index b9982b330..313c44784 100644 --- a/tests/core_hotunplug.c +++ b/tests/core_hotunplug.c @@ -52,6 +52,9 @@ struct hotunplug { static int local_close(int fd) { + if (fd < 0) /* not open - return current status */ + return fd; + errno = 0; if (close(fd)) /* close failure - return -errno (never -1) */ return -errno; @@ -91,11 +94,9 @@ static void driver_unbind(struct hotunplug *priv, const char *prefix) { igt_debug("%sunbinding the driver from the device\n", prefix); - priv->failure = "Driver unbind timeout!"; - igt_set_timeout(60, priv->failure); + igt_set_timeout(60, "Driver unbind timeout!"); igt_sysfs_set(priv->fd.sysfs_drv, "unbind", priv->dev_bus_addr); igt_reset_timeout(); - priv->failure = NULL; } /* Re-bind the driver to the device */ @@ -103,11 +104,9 @@ static void driver_bind(struct hotunplug *priv) { igt_debug("rebinding the driver to the device\n"); - priv->failure = "Driver re-bind timeout!"; - igt_set_timeout(60, priv->failure); + igt_set_timeout(60, "Driver re-bind timeout!"); igt_sysfs_set(priv->fd.sysfs_drv, "bind", priv->dev_bus_addr); igt_reset_timeout(); - priv->failure = NULL; } /* Remove (virtually unplug) the device from its bus */ @@ -122,11 +121,9 @@ static void device_unplug(struct hotunplug *priv, const char *prefix) igt_debug("%sunplugging the device\n", prefix); - priv->failure = "Device unplug timeout!"; - igt_set_timeout(60, priv->failure); + igt_set_timeout(60, "Device unplug timeout!"); igt_sysfs_set(priv->fd.sysfs_dev, "remove", "1"); igt_reset_timeout(); - priv->failure = NULL; priv->fd.sysfs_dev = local_close(priv->fd.sysfs_dev); igt_warn_on_f(priv->fd.sysfs_dev != -1, @@ -138,11 +135,15 @@ static void bus_rescan(struct hotunplug *priv) { igt_debug("recovering the device\n"); - priv->failure = "Bus rescan timeout!"; - igt_set_timeout(60, priv->failure); + igt_set_timeout(60, "Bus rescan timeout!"); igt_sysfs_set(priv->fd.sysfs_bus, "../rescan", "1"); igt_reset_timeout(); - priv->failure = NULL; +} + +static void cleanup(struct hotunplug *priv) +{ + priv->fd.drm = local_close(priv->fd.drm); + priv->fd.sysfs_dev = local_close(priv->fd.sysfs_dev); } static void healthcheck(struct hotunplug *priv) @@ -150,6 +151,18 @@ static void healthcheck(struct hotunplug *priv) /* preserve error code potentially stored before in priv->fd.drm */ int fd_drm; + if (faccessat(priv->fd.sysfs_bus, priv->dev_bus_addr, F_OK, 0)) { + priv->failure = "Bus rescan failed!"; + bus_rescan(priv); + priv->failure = NULL; + } + + if (faccessat(priv->fd.sysfs_drv, priv->dev_bus_addr, F_OK, 0)) { + priv->failure = "Driver re-bind failed!"; + driver_bind(priv); + priv->failure = NULL; + } + /* device name may have changed, rebuild IGT device list */ igt_devices_scan(true); @@ -169,6 +182,17 @@ static void healthcheck(struct hotunplug *priv) igt_assert_f(fd_drm == -1, "Device close failed\n"); } +static void recover(struct hotunplug *priv) +{ + cleanup(priv); + + if (!priv->failure) + return; + priv->failure = NULL; + + healthcheck(priv); +} + static void post_healthcheck(struct hotunplug *priv) { igt_abort_on_f(priv->failure, "%s\n", priv->failure); @@ -195,20 +219,20 @@ static void set_filter_from_device(int fd) static void unbind_rebind(struct hotunplug *priv) { + priv->failure = "need healthcheck"; + driver_unbind(priv, ""); driver_bind(priv); - - healthcheck(priv); } static void unplug_rescan(struct hotunplug *priv) { + priv->failure = "need healthcheck"; + device_unplug(priv, ""); bus_rescan(priv); - - healthcheck(priv); } static void hotunbind_lateclose(struct hotunplug *priv) @@ -217,6 +241,8 @@ static void hotunbind_lateclose(struct hotunplug *priv) priv->fd.drm = __drm_open_driver(DRIVER_ANY); igt_assert_fd(priv->fd.drm); + priv->failure = "need healthcheck"; + driver_unbind(priv, "hot "); driver_bind(priv); @@ -224,8 +250,6 @@ static void hotunbind_lateclose(struct hotunplug *priv) igt_debug("late closing the unbound device instance\n"); priv->fd.drm = local_close(priv->fd.drm); igt_warn_on_f(priv->fd.drm != -1, "Device close failed\n"); - - healthcheck(priv); } static void hotunplug_lateclose(struct hotunplug *priv) @@ -234,6 +258,8 @@ static void hotunplug_lateclose(struct hotunplug *priv) priv->fd.drm = __drm_open_driver(DRIVER_ANY); igt_assert_fd(priv->fd.drm); + priv->failure = "need healthcheck"; + device_unplug(priv, "hot "); bus_rescan(priv); @@ -241,8 +267,6 @@ static void hotunplug_lateclose(struct hotunplug *priv) igt_debug("late closing the removed device instance\n"); priv->fd.drm = local_close(priv->fd.drm); igt_warn_on_f(priv->fd.drm != -1, "Device close failed\n"); - - healthcheck(priv); } /* Main */ @@ -276,30 +300,50 @@ igt_main prepare(&priv); } - igt_describe("Check if the driver can be cleanly unbound from a device believed to be closed"); - igt_subtest("unbind-rebind") - unbind_rebind(&priv); + igt_subtest_group { + igt_describe("Check if the driver can be cleanly unbound from a device believed to be closed"); + igt_subtest("unbind-rebind") + unbind_rebind(&priv); + + igt_fixture + recover(&priv); + } igt_fixture post_healthcheck(&priv); - igt_describe("Check if a device believed to be closed can be cleanly unplugged"); - igt_subtest("unplug-rescan") - unplug_rescan(&priv); + igt_subtest_group { + igt_describe("Check if a device believed to be closed can be cleanly unplugged"); + igt_subtest("unplug-rescan") + unplug_rescan(&priv); + + igt_fixture + recover(&priv); + } igt_fixture post_healthcheck(&priv); - igt_describe("Check if the driver can be cleanly unbound from a still open device, then released"); - igt_subtest("hotunbind-lateclose") - hotunbind_lateclose(&priv); + igt_subtest_group { + igt_describe("Check if the driver can be cleanly unbound from a still open device, then released"); + igt_subtest("hotunbind-lateclose") + hotunbind_lateclose(&priv); + + igt_fixture + recover(&priv); + } igt_fixture post_healthcheck(&priv); - igt_describe("Check if a still open device can be cleanly unplugged, then released"); - igt_subtest("hotunplug-lateclose") - hotunplug_lateclose(&priv); + igt_subtest_group { + igt_describe("Check if a still open device can be cleanly unplugged, then released"); + igt_subtest("hotunplug-lateclose") + hotunplug_lateclose(&priv); + + igt_fixture + recover(&priv); + } igt_fixture { post_healthcheck(&priv); -- 2.21.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx