On Wed, Nov 17, 2010 at 03:45:28PM +0800, Hu Tao wrote: > `managed' watchdog action lets libvirtd decide what to do if a guest > watchdog fires. It has a subaction argument which is the action libvirtd > will take. Currently only `dump' subaction is defined, it tells libvirtd > to dump the guest on a watchdog event. > > `managed' watchdog action is mapped to `none' qemu watchdog action, thus > qemu will not get in the way of libvirtd's handling of watchdog events. Won't that mean that the guest will continue to execute ? I think we'd want to map it to 'pause', so the guest stops executing while we dump. > diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h > index 790ce98..49584c8 100644 > --- a/src/qemu/qemu_conf.h > +++ b/src/qemu/qemu_conf.h > @@ -106,6 +106,12 @@ enum qemud_cmd_flags { > struct qemud_driver { > virMutex lock; > > + virMutex workerPoolLock; > + int poolRef; > + struct virWorkerPool *workerPool; > + int watchdogEventCallbackID; > + virConnectPtr conn; We shouldn't be storing a virConnectPtr in here since that object for the public APIs - we should directly use the internal APIs. > @@ -4425,6 +4424,82 @@ retry: > } > } > > +struct watchdogEvent > +{ > + virDomain dom; > + int action; > + void *actionArg; > +}; > + > +static void processWatchdogEvent(void *data) > +{ > + struct watchdogEvent *wdEvent = data; > + > + switch (wdEvent->action) { > + case VIR_DOMAIN_WATCHDOG_SUBACTION_DUMP: > + { > + char *path = wdEvent->actionArg; > + char dumpfilePath[4096]; > + int i; > + > + i = snprintf(dumpfilePath, 4096, "%s/%s-%u", path, wdEvent->dom.name, (unsigned int)time(NULL)); > + dumpfilePath[i] = '\0'; It is preferrable to use 'virAsprintf()' here instead of a fixed buffer size. > + virDomainCoreDump(&wdEvent->dom, dumpfilePath, 0); Calling out to public APIs isn't allowed from the internal code. The functionality you want is mostly in the method qemudDomainCoreDump(), but it takes a virDomainPtr and you instead need one that has a virDomainObjPtr. So I'd pull the code out of qemudDomainCoreDump() into a static function you can call from here and qemudDomainCoreDump(). > + } > + break; > + } > + > + free(wdEvent->dom.name); > + free(wdEvent); > +} VIR_FREE() is required here. > + > +static int onWatchdogEvent(virConnectPtr conn, > + virDomainPtr dom, > + int action, > + void *opaque ATTRIBUTE_UNUSED) > +{ > + struct qemud_driver *driver = conn->privateData; > + struct watchdogEvent *wdEvent; > + virDomainObjPtr vm; > + > + wdEvent = malloc(sizeof(*wdEvent)); > + if (!wdEvent) > + return -1; malloc() isn't allowed in libvirt code - use VIR_ALLOC() and checkout the 'HACKING' file for guidelines. 'make syntax-check' should warn you about coding style issues like this. > + > + vm = virDomainFindByID(&driver->domains, dom->id); > + if (!vm || !vm->def->watchdog) { > + free(wdEvent); > + return -1; > + } > + > + if (vm->def->watchdog->action != VIR_DOMAIN_WATCHDOG_ACTION_MANAGED) { > + virDomainObjUnlock(vm); > + free(wdEvent); > + return 0; > + } > + > + if (action != 0) > + return 0; > + > + wdEvent->dom = *dom; > + wdEvent->dom.name = malloc(strlen(dom->name)); > + if (!wdEvent->dom.name) { > + virDomainObjUnlock(vm); > + free(wdEvent); > + return -1; > + } > + strcpy(wdEvent->dom.name, dom->name); > + > + wdEvent->action = vm->def->watchdog->subaction; > + wdEvent->actionArg = vm->def->watchdog->subactionArg; > + > + virWorkerPoolSendJob(driver->workerPool, wdEvent); > + virDomainObjUnlock(vm); > + > + return 0; > +} > + > + > static virDrvOpenStatus qemudOpen(virConnectPtr conn, > virConnectAuthPtr auth ATTRIBUTE_UNUSED, > int flags ATTRIBUTE_UNUSED) { > @@ -4481,6 +4556,30 @@ static virDrvOpenStatus qemudOpen(virConnectPtr conn, > } > } > } > + > + virMutexLock(&qemu_driver->workerPoolLock); > + if (!qemu_driver->workerPool) { > + qemu_driver->workerPool = virWorkerPoolNew(0, 1, processWatchdogEvent); > + if (qemu_driver->workerPool) { > + qemu_driver->watchdogEventCallbackID = virDomainEventCallbackListAddID(conn, > + qemu_driver->domainEventCallbacks, > + NULL, > + VIR_DOMAIN_EVENT_ID_WATCHDOG, > + VIR_DOMAIN_EVENT_CALLBACK(onWatchdogEvent), > + NULL, > + NULL); > + if (qemu_driver->watchdogEventCallbackID == -1) { > + virWorkerPoolFree(qemu_driver->workerPool); > + qemu_driver->workerPool = NULL; > + } else { > + qemu_driver->poolRef = 1; > + conn->refs--; > + } > + } > + } else > + qemu_driver->poolRef++; > + virMutexUnlock(&qemu_driver->workerPoolLock); This is where most of the problems arise. 'qemudOpen' is only run when a client app connects to libvirt. We obviously want automatic core dumps to run any time a guest crashes, not only when a client app is connected to libvirt. Instead of using the virDomainEvent... APIs, you want to directly add code to the internal method qemuHandleDomainWatchdog() which is where the watchdog events originate. The worker pool can be created right at the start of QEMU driver startup in qemudStartup() avoiding the need for any ref counting on it. > + > conn->privateData = qemu_driver; > > return VIR_DRV_OPEN_SUCCESS; > @@ -4489,6 +4588,15 @@ static virDrvOpenStatus qemudOpen(virConnectPtr conn, > static int qemudClose(virConnectPtr conn) { > struct qemud_driver *driver = conn->privateData; > > + virMutexLock(&driver->workerPoolLock); > + if (--driver->poolRef == 0) { > + conn->refs--; > + virDomainEventCallbackListRemoveID(conn, driver->domainEventCallbacks, driver->watchdogEventCallbackID); > + virWorkerPoolFree(driver->workerPool); > + driver->workerPool = NULL; > + } > + virMutexUnlock(&driver->workerPoolLock); This chunk won't be needed at all if everything is done from the qemuHandleDomainWatchdog() method. Also a separate 'workerPoolLock' is probably overkill - just protect it with the regular qemu driver lock Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list