Am Fri, 13 Nov 2015 14:36:54 -0700 schrieb Jim Fehlig <jfehlig@xxxxxxxx>: Hi, i have tested the patches provide by Jim Fehlig. The setup consist of xen 4.6.0 with libvirt 1.2.19 and the patches. As i describe in my posting in the libvirt-user list (https://www.redhat.com/archives/libvir-list/2015-November/msg00130.html), the system resume 20 VM´s in parallel, attach a block-device and then attach a network-device, run 60 seconds, destroy each vm and starts again. until now the test-system has done 1820 cycles i observe , until now, no errors, no crashes and also the system doesn´t freeze. Everything looks good. so thank´s a lot for your time, work and help. all the best max > Prior to this patch, libxl events were delivered to libvirt via > the libxlDomainEventHandler callback registered with libxl. > Documenation in $xensrc/tools/libxl/libxl_event.h states that the > callback "may occur on any thread in which the application calls > libxl". This can result in deadlock since many of the libvirt > callees of libxl hold a lock on the virDomainObj they are working > on. When the callback is invoked, it attempts to find a virDomainObj > corresponding to the domain ID provided by libxl. Searching the > domain obj list results in locking each obj before checking if it is > active, and its ID equals the requested ID. Deadlock is possible > when attempting to lock an obj that is already locked further up > the call stack. Indeed, Max Ustermann recently reported an instance > of this deadlock > > https://www.redhat.com/archives/libvir-list/2015-November/msg00130.html > > This patch moves processing of libxl events to a thread, where > libxl_event_wait() is used to collect events. This allows processing > libxl events asynchronously in libvirt, avoiding the deadlock. > > Reported-by: max ustermann <ustermann78@xxxxxx> > Signed-off-by: Jim Fehlig <jfehlig@xxxxxxxx> > --- > > The only reservations I have about this patch come from comments > about libxl_event_wait in libxl_event.h > > Like libxl_event_check but blocks if no suitable events are > available, until some are. Uses libxl_osevent_beforepoll/ > _afterpoll so may be inefficient if very many domains are being > handled by a single program. > > libvirt does handle "very many domains" :-). But thus far, I haven't > noticed any problems in my testing. The reporter expressed interest > in testing the patch. Positive results from that testing would improve > my confidence, as would an ACK from Ian Jackson. > > src/libxl/libxl_domain.c | 130 > ++++++++++++++++++++++------------------------- > src/libxl/libxl_domain.h | 16 +----- src/libxl/libxl_driver.c | 13 > ++--- 3 files changed, 66 insertions(+), 93 deletions(-) > > diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c > index 40dcea1..0b8c292 100644 > --- a/src/libxl/libxl_domain.c > +++ b/src/libxl/libxl_domain.c > @@ -380,27 +380,14 @@ virDomainDefParserConfig > libxlDomainDefParserConfig = { .domainPostParseCallback = > libxlDomainDefPostParse, }; > > - > -struct libxlShutdownThreadInfo > -{ > - libxlDriverPrivatePtr driver; > - virDomainObjPtr vm; > - libxl_event *event; > -}; > - > - > static void > -libxlDomainShutdownThread(void *opaque) > +libxlDomainShutdownEventHandler(libxlDriverPrivatePtr driver, > + virDomainObjPtr vm, > + libxl_event *ev) > { > - struct libxlShutdownThreadInfo *shutdown_info = opaque; > - virDomainObjPtr vm = shutdown_info->vm; > - libxl_event *ev = shutdown_info->event; > - libxlDriverPrivatePtr driver = shutdown_info->driver; > virObjectEventPtr dom_event = NULL; > libxl_shutdown_reason xl_reason = > ev->u.domain_shutdown.shutdown_reason; > - libxlDriverConfigPtr cfg; > - > - cfg = libxlDriverConfigGet(driver); > + libxlDriverConfigPtr cfg = libxlDriverConfigGet(driver); > > if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_MODIFY) < 0) > goto cleanup; > @@ -501,74 +488,77 @@ libxlDomainShutdownThread(void *opaque) > virObjectUnlock(vm); > if (dom_event) > libxlDomainEventQueue(driver, dom_event); > - libxl_event_free(cfg->ctx, ev); > - VIR_FREE(shutdown_info); > virObjectUnref(cfg); > } > > +static int > +libxlDomainXEventsPredicate(const libxl_event *event, > + ATTRIBUTE_UNUSED void *data) > +{ > + /* > + * Currently we only handle shutdown event > + */ > + if (event->type == LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) > + return 1; > + > + return 0; > +} > + > /* > - * Handle previously registered domain event notification from > libxenlight. > + * Process events from libxl using libxl_event_wait. > */ > -void > -libxlDomainEventHandler(void *data, VIR_LIBXL_EVENT_CONST > libxl_event *event) +static void > +libxlDomainXEventsProcess(void *opaque) > { > - libxlDriverPrivatePtr driver = data; > + libxlDriverPrivatePtr driver = opaque; > + libxl_event *event; > virDomainObjPtr vm = NULL; > - libxl_shutdown_reason xl_reason = > event->u.domain_shutdown.shutdown_reason; > - struct libxlShutdownThreadInfo *shutdown_info = NULL; > - virThread thread; > - libxlDriverConfigPtr cfg; > + libxl_shutdown_reason xl_reason; > + libxlDriverConfigPtr cfg = libxlDriverConfigGet(driver); > > - if (event->type != LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN) { > - VIR_INFO("Unhandled event type %d", event->type); > - goto error; > - } > + while (1) { > + event = NULL; > + libxl_event_wait(cfg->ctx, &event, LIBXL_EVENTMASK_ALL, > + libxlDomainXEventsPredicate, NULL); > > - /* > - * Similar to the xl implementation, ignore SUSPEND. Any > actions needed > - * after calling libxl_domain_suspend() are handled by its > callers. > - */ > - if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND) > - goto error; > + if (event == NULL) { > + VIR_INFO("libxl_event_wait returned a NULL event"); > + continue; > + } > > - vm = virDomainObjListFindByID(driver->domains, event->domid); > - if (!vm) { > - VIR_INFO("Received event for unknown domain ID %d", > event->domid); > - goto error; > - } > - > - /* > - * Start a thread to handle shutdown. We don't want to be tying > up > - * libxl's event machinery by doing a potentially lengthy > shutdown. > - */ > - if (VIR_ALLOC(shutdown_info) < 0) > - goto error; > - > - shutdown_info->driver = driver; > - shutdown_info->vm = vm; > - shutdown_info->event = (libxl_event *)event; > - if (virThreadCreate(&thread, false, libxlDomainShutdownThread, > - shutdown_info) < 0) { > + xl_reason = event->u.domain_shutdown.shutdown_reason; > /* > - * Not much we can do on error here except log it. > + * Similar to the xl implementation, ignore SUSPEND. Any > actions needed > + * after calling libxl_domain_suspend() are handled by its > callers. */ > - VIR_ERROR(_("Failed to create thread to handle domain > shutdown")); > - goto error; > + if (xl_reason == LIBXL_SHUTDOWN_REASON_SUSPEND) { > + libxl_event_free(cfg->ctx, event); > + continue; > + } > + > + vm = virDomainObjListFindByID(driver->domains, event->domid); > + if (!vm) { > + VIR_INFO("Received event for unknown domain ID %d", > event->domid); > + libxl_event_free(cfg->ctx, event); > + continue; > + } > + > + libxlDomainShutdownEventHandler(driver, vm, event); > + libxl_event_free(cfg->ctx, event); > } > > - /* > - * VM is unlocked and libxl_event freed in shutdown thread > - */ > - return; > - > - error: > - cfg = libxlDriverConfigGet(driver); > - /* Cast away any const */ > - libxl_event_free(cfg->ctx, (libxl_event *)event); > virObjectUnref(cfg); > - if (vm) > - virObjectUnlock(vm); > - VIR_FREE(shutdown_info); > +} > + > +int > +libxlDomainXEventsInit(libxlDriverPrivatePtr driver) > +{ > + virThread thread; > + > + if (virThreadCreate(&thread, false, libxlDomainXEventsProcess, > driver) < 0) > + return -1; > + > + return 0; > } > > void > diff --git a/src/libxl/libxl_domain.h b/src/libxl/libxl_domain.h > index 44b3e0b..18a9ddc 100644 > --- a/src/libxl/libxl_domain.h > +++ b/src/libxl/libxl_domain.h > @@ -112,20 +112,8 @@ void > libxlDomainCleanup(libxlDriverPrivatePtr driver, > virDomainObjPtr vm); > > -/* > - * Note: Xen 4.3 removed the const from the event handler signature. > - * Detect which signature to use based on > - * LIBXL_HAVE_NONCONST_EVENT_OCCURS_EVENT_ARG. > - */ > -# ifdef LIBXL_HAVE_NONCONST_EVENT_OCCURS_EVENT_ARG > -# define VIR_LIBXL_EVENT_CONST /* empty */ > -# else > -# define VIR_LIBXL_EVENT_CONST const > -# endif > - > -void > -libxlDomainEventHandler(void *data, > - VIR_LIBXL_EVENT_CONST libxl_event *event); > +int > +libxlDomainXEventsInit(libxlDriverPrivatePtr driver); > > int > libxlDomainAutoCoreDump(libxlDriverPrivatePtr driver, > diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c > index fcdcbdb..050ed0f 100644 > --- a/src/libxl/libxl_driver.c > +++ b/src/libxl/libxl_driver.c > @@ -503,12 +503,6 @@ static const libxl_childproc_hooks > libxl_child_hooks = { #endif > }; > > -const struct libxl_event_hooks ev_hooks = { > - .event_occurs_mask = LIBXL_EVENTMASK_ALL, > - .event_occurs = libxlDomainEventHandler, > - .disaster = NULL, > -}; > - > static int > libxlAddDom0(libxlDriverPrivatePtr driver) > { > @@ -626,9 +620,6 @@ libxlStateInitialize(bool privileged, > /* Setup child process handling. See > $xen-src/tools/libxl/libxl_event.h */ > libxl_childproc_setmode(cfg->ctx, &libxl_child_hooks, cfg->ctx); > - /* Register callback to handle domain events */ > - libxl_event_register_callbacks(cfg->ctx, &ev_hooks, > libxl_driver); - > libxl_driver->config = cfg; > if (virFileMakePath(cfg->stateDir) < 0) { > virReportError(VIR_ERR_INTERNAL_ERROR, > @@ -683,6 +674,10 @@ libxlStateInitialize(bool privileged, > if (!(libxl_driver->xmlopt = libxlCreateXMLConf())) > goto error; > > + /* Start processing events from libxl */ > + if (libxlDomainXEventsInit(libxl_driver) < 0) > + goto error; > + > /* Add Domain-0 */ > if (libxlAddDom0(libxl_driver) < 0) > goto error; -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list