Libvirt has a fixed 15 second timeout for qemu to exit. If qemu is writing to a slow USB key, it can hang (in D state) for much longer than this - many minutes usually. The solution is to check specifically for the libvirt EBUSY error when this happens, and retry the virDomainDestroyFlags operation (indefinitely). See also the description here: https://www.redhat.com/archives/libvir-list/2016-January/msg00767.html Similar to the following OpenStack Nova commit: http://git.openstack.org/cgit/openstack/nova/commit/?id=3907867 Thanks: Kashyap Chamarthy and Daniel Berrange. --- src/launch-libvirt.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/launch-libvirt.c b/src/launch-libvirt.c index 8215e02..90b6c49 100644 --- a/src/launch-libvirt.c +++ b/src/launch-libvirt.c @@ -25,6 +25,7 @@ #include <unistd.h> #include <fcntl.h> #include <grp.h> +#include <errno.h> #include <sys/types.h> #include <sys/stat.h> #include <assert.h> @@ -2015,6 +2016,8 @@ ignore_errors (void *ignore, virErrorPtr ignore2) /* empty */ } +static int destroy_domain (guestfs_h *g, virDomainPtr dom, int check_for_errors); + static int shutdown_libvirt (guestfs_h *g, void *datav, int check_for_errors) { @@ -2023,23 +2026,14 @@ shutdown_libvirt (guestfs_h *g, void *datav, int check_for_errors) virDomainPtr dom = data->dom; size_t i; int ret = 0; - int flags; /* Note that we can be called back very early in launch (specifically * from launch_libvirt itself), when conn and dom might be NULL. */ - if (dom != NULL) { - flags = check_for_errors ? VIR_DOMAIN_DESTROY_GRACEFUL : 0; - debug (g, "calling virDomainDestroy \"%s\" flags=%s", - data->name, check_for_errors ? "VIR_DOMAIN_DESTROY_GRACEFUL" : "0"); - if (virDomainDestroyFlags (dom, flags) == -1) { - libvirt_error (g, _("could not destroy libvirt domain")); - ret = -1; - } + ret = destroy_domain (g, dom, check_for_errors); virDomainFree (dom); } - if (conn != NULL) virConnectClose (conn); @@ -2068,6 +2062,37 @@ shutdown_libvirt (guestfs_h *g, void *datav, int check_for_errors) return ret; } +/* Wrapper around virDomainDestroy which handles errors and retries.. */ +static int +destroy_domain (guestfs_h *g, virDomainPtr dom, int check_for_errors) +{ + const int flags = check_for_errors ? VIR_DOMAIN_DESTROY_GRACEFUL : 0; + virErrorPtr err; + + again: + debug (g, "calling virDomainDestroy flags=%s", + check_for_errors ? "VIR_DOMAIN_DESTROY_GRACEFUL" : "0"); + if (virDomainDestroyFlags (dom, flags) == -1) { + err = virGetLastError (); + + /* Second chance if we're just waiting for qemu to shut down. See: + * https://www.redhat.com/archives/libvir-list/2016-January/msg00767.html + */ + if ((flags & VIR_DOMAIN_DESTROY_GRACEFUL) && + err && err->code == VIR_ERR_SYSTEM_ERROR && err->int1 == EBUSY) + goto again; + + /* "Domain not found" is not treated as an error. */ + if (err && err->code == VIR_ERR_NO_DOMAIN) + return 0; + + libvirt_error (g, _("could not destroy libvirt domain")); + return -1; + } + + return 0; +} + /* Wrapper around error() which produces better errors for * libvirt functions. */ -- 2.5.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list