On Thu, Jun 14, 2018 at 12:16:21PM +0200, Michael Trapp wrote: > vhostmd has no signal handler for SIGPIPE and a restart of libvirtd results in a > stopped vhostmd. The root cause seems to be a UDS socket between vhostmd and > libvirtd which is closed by a libvirtd restart. In addition to the signal handler > the connection to libvirtd has to be opened again otherwise vhostmd can't read > any data from libvirtd and doesn't update the metrics. I don't know the vhostmd code, but the recommended approach to dealing wiht lost connections is to use virConnectRegisterCloseCallback() to get a callback notification when connection is lost. This requires that you have an event loop impl provided to libvirt first though. > > --- > vhostmd/vhostmd.c | 2 ++ > vhostmd/virt-util.c | 45 +++++++++++++++++++++++++++++++++++++++------ > 2 files changed, 41 insertions(+), 6 deletions(-) > > diff --git a/vhostmd/vhostmd.c b/vhostmd/vhostmd.c > index 7f04705..4cf4630 100644 > --- a/vhostmd/vhostmd.c > +++ b/vhostmd/vhostmd.c > @@ -117,6 +117,7 @@ static void sig_handler(int sig, siginfo_t *siginfo ATTRIBUTE_UNUSED, > case SIGQUIT: > down = 1; > break; > + case SIGPIPE: > default: > break; > } > @@ -1053,6 +1054,7 @@ int main(int argc, char *argv[]) > sigaction(SIGINT, &sig_action, NULL); > sigaction(SIGQUIT, &sig_action, NULL); > sigaction(SIGTERM, &sig_action, NULL); > + sigaction(SIGPIPE, &sig_action, NULL); > > xmlInitParser(); > > diff --git a/vhostmd/virt-util.c b/vhostmd/virt-util.c > index 1c31305..c76e224 100644 > --- a/vhostmd/virt-util.c > +++ b/vhostmd/virt-util.c > @@ -44,17 +44,47 @@ do_connect (void) > return 0; > } > > +static int > +do_reconnect(void) > +{ > + if (conn != NULL) > + virConnectClose (conn); > + > + conn = virConnectOpenReadOnly (libvirt_uri); > + if (conn == NULL) { > + vu_log (VHOSTMD_ERR, "Unable to open libvirt connection to %s", > + libvirt_uri ? libvirt_uri : "default hypervisor"); > + return -1; > + } > + return 0; > +} If you're going to blindly reconnect on every possible error reported by libvirt, at least put in a check to see if the connection is genuinely broken or not. eg if (virConnectIsAlive()) return -1; so that the error is still treated as fatal if this is not a connection lost scenario. > + > int vu_num_vms(void) > { > + int rc; > + > if (do_connect () == -1) return -1; > + > + rc = virConnectNumOfDomains(conn); > + if (rc < 0) { > + if (do_reconnect()) return -1; > return virConnectNumOfDomains(conn); > + } > + return rc; > } > > int vu_get_vms(int *ids, int max_ids) > { > - if (do_connect () == -1) return -1; > + int rc; > + > + if (do_connect() == -1) return -1; > > - return (virConnectListDomains(conn, ids, max_ids)); > + rc = virConnectListDomains(conn, ids, max_ids); > + if (rc < 0) { > + if (do_reconnect()) return -1; > + return virConnectListDomains(conn, ids, max_ids); > + } > + return rc; > } > > vu_vm *vu_get_vm(int id) > @@ -74,8 +104,11 @@ vu_vm *vu_get_vm(int id) > > dom = virDomainLookupByID(conn, id); > if (dom == NULL) { > - vu_log(VHOSTMD_ERR, "Failed to lookup domain for id %d", id); > - goto error; > + if (do_reconnect() || > + (dom = virDomainLookupByID(conn, id)) == NULL) { > + vu_log(VHOSTMD_ERR, "Failed to lookup domain for id %d", id); > + goto error; > + } > } > > uuid[0] = '\0'; > @@ -98,8 +131,8 @@ vu_vm *vu_get_vm(int id) > void vu_vm_free(vu_vm *vm) > { > if (vm) { > - free(vm->name); > - free(vm->uuid); > + if (vm->name) free(vm->name); > + if (vm->uuid) free(vm->uuid); > free(vm); > } > } > -- > 2.12.3 > > _______________________________________________ > virt-tools-list mailing list > virt-tools-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/virt-tools-list Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| _______________________________________________ virt-tools-list mailing list virt-tools-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/virt-tools-list