On Thu, May 03, 2012 at 11:10:49AM -0400, Laine Stump wrote: > This solves the problem detailed in: > > https://bugzilla.redhat.com/show_bug.cgi?id=816465 > > and further detailed in > > https://www.redhat.com/archives/libvir-list/2012-May/msg00202.htm > > A short explanation is included in the comments of the patch itself. > > Even with ACK, I will wait to push this until I have verification that > it does not break lldpad<-->libvirtd communication (if it does, I may > need to use the nl_handle allocated during virNetlinkStartup() for > virNetlinkEventServiceStart()). > --- > daemon/libvirtd.c | 6 +++++ > src/libvirt_private.syms | 2 ++ > src/util/virnetlink.c | 67 +++++++++++++++++++++++++++++++++++++++++++++- > src/util/virnetlink.h | 5 +++- > 4 files changed, 78 insertions(+), 2 deletions(-) > > diff --git a/daemon/libvirtd.c b/daemon/libvirtd.c > index b098f6a..5d57b50 100644 > --- a/daemon/libvirtd.c > +++ b/daemon/libvirtd.c > @@ -1007,6 +1007,11 @@ int main(int argc, char **argv) { > goto cleanup; > } > > + if (virNetlinkStartup() < 0) { > + ret = VIR_DAEMON_ERR_INIT; > + goto cleanup; > + } > + > if (!(srv = virNetServerNew(config->min_workers, > config->max_workers, > config->prio_workers, > @@ -1143,6 +1148,7 @@ cleanup: > virNetServerProgramFree(qemuProgram); > virNetServerClose(srv); > virNetServerFree(srv); > + virNetlinkShutdown(); > if (statuswrite != -1) { > if (ret != 0) { > /* Tell parent of daemon what failed */ > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index d4038b2..e911774 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -1330,6 +1330,8 @@ virNetlinkEventRemoveClient; > virNetlinkEventServiceIsRunning; > virNetlinkEventServiceStop; > virNetlinkEventServiceStart; > +virNetlinkShutdown; > +virNetlinkStartup; > > > # virnetmessage.h > diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c > index b2e9d51..a249e94 100644 > --- a/src/util/virnetlink.c > +++ b/src/util/virnetlink.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (C) 2010-2011 Red Hat, Inc. > + * Copyright (C) 2010-2012 Red Hat, Inc. > * Copyright (C) 2010-2012 IBM Corporation > * > * This library is free software; you can redistribute it and/or > @@ -88,10 +88,63 @@ static int nextWatch = 1; > # define NETLINK_EVENT_ALLOC_EXTENT 10 > > static virNetlinkEventSrvPrivatePtr server = NULL; > +static struct nl_handle *placeholder_nlhandle = NULL; > > /* Function definitions */ > > /** > + * virNetlinkStartup: > + * > + * Perform any initialization that needs to take place before the > + * program starts up worker threads. This is currently used to assure > + * that an nl_handle is allocated prior to any attempts to bind a > + * netlink socket. For a discussion of why this is necessary, please > + * see the following email message: > + * > + * https://www.redhat.com/archives/libvir-list/2012-May/msg00202.html > + * > + * The short version is that, without this placeholder allocation of > + * an nl_handle that is never used, it is possible for nl_connect() in > + * one thread to collide with a direct bind() of a netlink socket in > + * another thread, leading to failure of the operation (which could > + * lead to failure of libvirtd to start). Since getaddrinfo() (used by > + * libvirtd in virSocketAddrParse, which is called quite frequently > + * during startup) directly calls bind() on a netlink socket, this is > + * actually a very common occurence (15-20% failure rate on some > + * hardware). > + * > + * Returns 0 on success, -1 on failure. > + */ > +int > +virNetlinkStartup(void) > +{ > + if (placeholder_nlhandle) > + return 0; > + placeholder_nlhandle = nl_handle_alloc(); > + if (!placeholder_nlhandle) { > + virReportSystemError(errno, "%s", > + _("cannot allocate placeholder nlhandle for netlink")); > + return -1; > + } > + return 0; > +} > + > +/** > + * virNetlinkShutdown: > + * > + * Undo any initialization done by virNetlinkStartup. This currently > + * destroys the placeholder nl_handle. > + */ > +void > +virNetlinkShutdown(void) > +{ > + if (placeholder_nlhandle) { > + nl_handle_destroy(placeholder_nlhandle); > + placeholder_nlhandle = NULL; > + } > +} > + > +/** > * virNetlinkCommand: > * @nlmsg: pointer to netlink message > * @respbuf: pointer to pointer where response buffer will be allocated > @@ -535,6 +588,18 @@ static const char *unsupported = N_("libnl was not available at build time"); > static const char *unsupported = N_("not supported on non-linux platforms"); > # endif > > +int > +virNetlinkStartup(void) > +{ > + return 0; > +} > + > +void > +virNetlinkShutdown(void) > +{ > + return; > +} > + > int virNetlinkCommand(struct nl_msg *nl_msg ATTRIBUTE_UNUSED, > unsigned char **respbuf ATTRIBUTE_UNUSED, > unsigned int *respbuflen ATTRIBUTE_UNUSED, > diff --git a/src/util/virnetlink.h b/src/util/virnetlink.h > index a72612e..93df59a 100644 > --- a/src/util/virnetlink.h > +++ b/src/util/virnetlink.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (C) 2010-2011 Red Hat, Inc. > + * Copyright (C) 2010-2012 Red Hat, Inc. > * Copyright (C) 2010-2012 IBM Corporation > * > * This library is free software; you can redistribute it and/or > @@ -35,6 +35,9 @@ struct nlattr; > > # endif /* __linux__ */ > > +int virNetlinkStartup(void); > +void virNetlinkShutdown(void); > + > int virNetlinkCommand(struct nl_msg *nl_msg, > unsigned char **respbuf, unsigned int *respbuflen, > int nl_pid); ACK, assuming real world testing confirms it fixes the problm Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list