This solves the problem detailed in: https://bugzilla.redhat.com/show_bug.cgi?id=816465 and further detailed in https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm A short explanation is included in the comments of the patch itself. Even with ACK, I will wait to push this until I have verification that it does not break lldpad<-->libvirtd communication (if it does, I may need to use the nl_handle allocated during virNetlinkStartup() for virNetlinkEventServiceStart()). --- daemon/libvirtd.c | 6 +++++ src/libvirt_private.syms | 2 ++ src/util/virnetlink.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/virnetlink.h | 5 +++- 4 files changed, 77 insertions(+), 1 deletion(-) diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c index b098f6a..5d57b50 100644 --- a/daemon/libvirtd.c +++ b/daemon/libvirtd.c @@ -1007,6 +1007,11 @@ int main(int argc, char **argv) { goto cleanup; } + if (virNetlinkStartup() < 0) { + ret = VIR_DAEMON_ERR_INIT; + goto cleanup; + } + if (!(srv = virNetServerNew(config->min_workers, config->max_workers, config->prio_workers, @@ -1143,6 +1148,7 @@ cleanup: virNetServerProgramFree(qemuProgram); virNetServerClose(srv); virNetServerFree(srv); + virNetlinkShutdown(); if (statuswrite != -1) { if (ret != 0) { /* Tell parent of daemon what failed */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 88f8a21..9b20dd4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1333,6 +1333,8 @@ virNetlinkEventRemoveClient; virNetlinkEventServiceIsRunning; virNetlinkEventServiceStop; virNetlinkEventServiceStart; +virNetlinkShutdown; +virNetlinkStartup; # virnetmessage.h diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c index 2cbf32a..1b64033 100644 --- a/src/util/virnetlink.c +++ b/src/util/virnetlink.c @@ -98,10 +98,63 @@ static int nextWatch = 1; # define NETLINK_EVENT_ALLOC_EXTENT 10 static virNetlinkEventSrvPrivatePtr server = NULL; +static struct nl_handle *placeholder_nlhandle = NULL; /* Function definitions */ /** + * virNetlinkStartup: + * + * Perform any initialization that needs to take place before the + * program starts up worker threads. This is currently used to assure + * that an nl_handle is allocated prior to any attempts to bind a + * netlink socket. For a discussion of why this is necessary, please + * see the following email message: + * + * https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html + * + * The short version is that, without this placeholder allocation of + * an nl_handle that is never used, it is possible for nl_connect() in + * one thread to collide with a direct bind() of a netlink socket in + * another thread, leading to failure of the operation (which could + * lead to failure of libvirtd to start). Since getaddrinfo() (used by + * libvirtd in virSocketAddrParse, which is called quite frequently + * during startup) directly calls bind() on a netlink socket, this is + * actually a very common occurence (15-20% failure rate on some + * hardware). + * + * Returns 0 on success, -1 on failure. + */ +int +virNetlinkStartup(void) +{ + if (placeholder_nlhandle) + return 0; + placeholder_nlhandle = nl_handle_alloc(); + if (!placeholder_nlhandle) { + virReportSystemError(errno, "%s", + _("cannot allocate placeholder nlhandle for netlink")); + return -1; + } + return 0; +} + +/** + * virNetlinkShutdown: + * + * Undo any initialization done by virNetlinkStartup. This currently + * destroys the placeholder nl_handle. + */ +void +virNetlinkShutdown(void) +{ + if (placeholder_nlhandle) { + nl_handle_destroy(placeholder_nlhandle); + placeholder_nlhandle = NULL; + } +} + +/** * virNetlinkCommand: * @nlmsg: pointer to netlink message * @respbuf: pointer to pointer where response buffer will be allocated @@ -546,6 +599,18 @@ static const char *unsupported = N_("libnl was not available at build time"); static const char *unsupported = N_("not supported on non-linux platforms"); # endif +int +virNetlinkStartup(void) +{ + return 0; +} + +void +virNetlinkShutdown(void) +{ + return; +} + int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED, unsigned char **respbuf ATTRIBUTE_UNUSED, unsigned int *respbuflen ATTRIBUTE_UNUSED, diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h index a72612e..93df59a 100644 --- a/src/util/virnetlink.h +++ b/src/util/virnetlink.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2011 Red Hat, Inc. + * Copyright (C) 2010-2012 Red Hat, Inc. * Copyright (C) 2010-2012 IBM Corporation * * This library is free software; you can redistribute it and/or @@ -35,6 +35,9 @@ struct nlattr; # endif /* __linux__ */ +int virNetlinkStartup(void); +void virNetlinkShutdown(void); + int virNetlinkCommand(struct nl_msg *nl_msg, unsigned char **respbuf, unsigned int *respbuflen, int nl_pid); -- 1.7.10 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list