On Sat, Aug 20, 2011 at 01:06:10PM +0200, Michal Privoznik wrote: > This patch introduces support for domain destroying via 'quit' monitor > command which gives qemu time to flush caches and therefore prevent > disks corruption. However, qemu can be unresponsive and to prevent > waiting indefinitely, execute command in a separate thread and wait > reasonable time (QEMU_QUIT_WAIT_SECONDS) on a condition. If we hit > timeout, qemu is qemuProcessKill'ed which causes monitor close and > therefore also thread being terminable. > > The synchronization between qemu driver and monitor-quit thread is done > through mutex and condition. However, this alone is not enough. If a > condition is signalized but without anybody listening signal is lost. To > prevent this a boolean variable is used that is set iff nobody is > listening but condition would be signalized, or iff driver is waiting on > given condition. > --- > include/libvirt/libvirt.h.in | 11 ++- > src/qemu/qemu_driver.c | 132 +++++++++++++++++++++++++++++++++++++++-- > src/qemu/qemu_monitor.c | 13 ++++ > src/qemu/qemu_monitor.h | 2 + > src/qemu/qemu_monitor_json.c | 22 +++++++ > src/qemu/qemu_monitor_json.h | 1 + > src/qemu/qemu_monitor_text.c | 20 ++++++ > src/qemu/qemu_monitor_text.h | 1 + > tools/virsh.c | 8 ++- > 9 files changed, 198 insertions(+), 12 deletions(-) > > diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in > index aa29fb6..fa98b8d 100644 > --- a/include/libvirt/libvirt.h.in > +++ b/include/libvirt/libvirt.h.in > @@ -919,10 +919,13 @@ virConnectPtr virDomainGetConnect (virDomainPtr domain); > * Domain creation and destruction > */ > > -/* > - * typedef enum { > - * } virDomainDestroyFlagsValues; > - */ > + > +typedef enum { > + VIR_DOMAIN_DESTROY_MONITOR = 1 << 0, /* In hypervisors supporting this, > + try to send 'quit' command prior > + to killing hypervisor */ > +} virDomainDestroyFlagsValues; Rather than describing the flag based on the QEmu command I would rather describe it's intended effects, i.e. VIR_DOMAIN_DESTROY_FLUSH_CACHE = 1 << 0, /* In hypervisors supporting this, try to get cache flushed prior to destroying the domain */ > virDomainPtr virDomainCreateXML (virConnectPtr conn, > const char *xmlDesc, > unsigned int flags); > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 421a98e..6585cb4 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -1564,6 +1564,42 @@ cleanup: > return ret; > } > > +struct qemuDomainDestroyHelperData { > + struct qemud_driver *driver; > + virDomainObjPtr vm; > + qemuDomainObjPrivatePtr priv; > + virMutex lock; > + virCond cond; > + bool guard; > +}; > + > +static void > +qemuDomainDestroyHelper(void *opaque) > +{ > + struct qemuDomainDestroyHelperData *data = opaque; > + struct qemud_driver *driver = data->driver; > + virDomainObjPtr vm = data->vm; > + qemuDomainObjPrivatePtr priv = data->priv; > + > + /* Job was already obtained by caller */ > + qemuDomainObjEnterMonitorWithDriver(driver, vm); > + qemuMonitorQuit(priv->mon); > + qemuDomainObjExitMonitorWithDriver(driver, vm); > + > + /* To avoid loosing signal, we need to remember > + * we tried to send one, but nobody was waiting > + * for it. > + */ > + virMutexLock(&data->lock); > + if (data->guard) { > + virCondSignal(&data->cond); > + } else { > + data->guard = true; > + } > + virMutexUnlock(&data->lock); > +} > + > +#define QEMU_QUIT_WAIT_SECONDS 5 > > static int > qemuDomainDestroyFlags(virDomainPtr dom, > @@ -1574,8 +1610,13 @@ qemuDomainDestroyFlags(virDomainPtr dom, > int ret = -1; > virDomainEventPtr event = NULL; > qemuDomainObjPrivatePtr priv; > + bool use_thread = false; > + bool use_kill = false; > + virThread destroy_thread; > + struct qemuDomainDestroyHelperData data; > + unsigned long long then; > > - virCheckFlags(0, -1); > + virCheckFlags(VIR_DOMAIN_DESTROY_MONITOR, -1); > > qemuDriverLock(driver); > vm = virDomainFindByUUID(&driver->domains, dom->uuid); > @@ -1590,12 +1631,14 @@ qemuDomainDestroyFlags(virDomainPtr dom, > priv = vm->privateData; > priv->fakeReboot = false; > > - /* Although qemuProcessStop does this already, there may > - * be an outstanding job active. We want to make sure we > - * can kill the process even if a job is active. Killing > - * it now means the job will be released > - */ > - qemuProcessKill(vm); > + if (!flags) { please test the given flag here instead of assuming the bit we know about now. > + /* Although qemuProcessStop does this already, there may > + * be an outstanding job active. We want to make sure we > + * can kill the process even if a job is active. Killing > + * it now means the job will be released > + */ > + qemuProcessKill(vm); > + } > > if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0) > goto cleanup; > @@ -1606,6 +1649,70 @@ qemuDomainDestroyFlags(virDomainPtr dom, > goto endjob; > } > > + if (flags & VIR_DOMAIN_DESTROY_MONITOR) { > + /* Try to shutdown domain gracefully. > + * Send 'quit' to qemu. However, qemu can be unresponsive. > + * Therefore create a separate thread in which we execute > + * that monitor comand. Wait max QEMU_QUIT_WAIT_SECONDS. > + */ > + if (virMutexInit(&data.lock) < 0) { > + virReportOOMError(); > + goto endjob; > + } > + > + if (virCondInit(&data.cond) < 0) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("cannot initialize thread condition")); > + virMutexDestroy(&data.lock); > + goto endjob; > + } > + > + data.driver = driver; > + data.vm = vm; > + data.priv = priv; > + data.guard = false; > + > + if (virThreadCreate(&destroy_thread, true, > + qemuDomainDestroyHelper, &data) < 0) { > + virReportSystemError(errno, "%s", > + _("unable to create destroy thread")); > + ignore_value(virCondDestroy(&data.cond)); > + virMutexDestroy(&data.lock); > + goto endjob; > + } > + use_thread = true; > + > + if (virTimeMs(&then) < 0) > + goto endjob; > + > + then += QEMU_QUIT_WAIT_SECONDS * 1000; > + > + /* How synchronization with destroy thread works: > + * Basically wait on data.cond. But because conditions > + * does not 'remember' that they have been signalized > + * data.guard is used. Practically, data.guard says > + * to destroy thread if we are waiting on condition > + * and to us whether we should even try. > + */ > + virMutexLock(&data.lock); > + if (!data.guard) { > + data.guard = true; > + if (virCondWaitUntil(&data.cond, &data.lock, then) < 0) { > + if (errno == ETIMEDOUT) { > + use_kill = true; > + data.guard = false; > + } else { > + virMutexUnlock(&data.lock); > + goto endjob; > + } > + } > + } > + virMutexUnlock(&data.lock); > + > + if (use_kill) > + qemuProcessKill(vm); > + } > + > qemuProcessStop(driver, vm, 0, VIR_DOMAIN_SHUTOFF_DESTROYED); > event = virDomainEventNewFromObj(vm, > VIR_DOMAIN_EVENT_STOPPED, > @@ -1626,6 +1733,17 @@ endjob: > vm = NULL; > > cleanup: > + if (use_thread) { > + virMutexLock(&data.lock); > + if (!data.guard) { > + data.guard = true; > + ignore_value(virCondWait(&data.cond, &data.lock)); > + } > + virMutexUnlock(&data.lock); > + ignore_value(virCondDestroy(&data.cond)); > + virMutexDestroy(&data.lock); > + virThreadJoin(&destroy_thread); > + } > if (vm) > virDomainObjUnlock(vm); > if (event) > diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c > index db6107c..41b9c5c 100644 > --- a/src/qemu/qemu_monitor.c > +++ b/src/qemu/qemu_monitor.c > @@ -2475,3 +2475,16 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon, > ret = qemuMonitorTextBlockJob(mon, device, bandwidth, info, mode); > return ret; > } > + > +int qemuMonitorQuit(qemuMonitorPtr mon) > +{ > + int ret; > + > + VIR_DEBUG("mon=%p", mon); > + > + if (mon->json) > + ret = qemuMonitorJSONQuit(mon); > + else > + ret = qemuMonitorTextQuit(mon); > + return ret; > +} > diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h > index f241c9e..3fe6bb9 100644 > --- a/src/qemu/qemu_monitor.h > +++ b/src/qemu/qemu_monitor.h > @@ -475,6 +475,8 @@ int qemuMonitorBlockJob(qemuMonitorPtr mon, > virDomainBlockJobInfoPtr info, > int mode); > > +int qemuMonitorQuit(qemuMonitorPtr mon); > + > /** > * When running two dd process and using <> redirection, we need a > * shell that will not truncate files. These two strings serve that > diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c > index 7adfb26..1f078cd 100644 > --- a/src/qemu/qemu_monitor_json.c > +++ b/src/qemu/qemu_monitor_json.c > @@ -2956,3 +2956,25 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon, > virJSONValueFree(reply); > return ret; > } > + > + > +int qemuMonitorJSONQuit(qemuMonitorPtr mon) > +{ > + int ret = -1; > + virJSONValuePtr cmd = NULL; > + virJSONValuePtr reply = NULL; > + > + cmd = qemuMonitorJSONMakeCommand("quit", NULL); > + > + if (!cmd) > + return -1; > + > + ret =qemuMonitorJSONCommand(mon, cmd, &reply); > + > + if (ret == 0) > + ret = qemuMonitorJSONCheckError(cmd, reply); > + > + virJSONValueFree(cmd); > + virJSONValueFree(reply); > + return ret; > +} > diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h > index 9512793..2a7df76 100644 > --- a/src/qemu/qemu_monitor_json.h > +++ b/src/qemu/qemu_monitor_json.h > @@ -231,4 +231,5 @@ int qemuMonitorJSONBlockJob(qemuMonitorPtr mon, > virDomainBlockJobInfoPtr info, > int mode); > > +int qemuMonitorJSONQuit(qemuMonitorPtr mon); > #endif /* QEMU_MONITOR_JSON_H */ > diff --git a/src/qemu/qemu_monitor_text.c b/src/qemu/qemu_monitor_text.c > index 335e39e..19c2690 100644 > --- a/src/qemu/qemu_monitor_text.c > +++ b/src/qemu/qemu_monitor_text.c > @@ -3046,3 +3046,23 @@ cleanup: > VIR_FREE(reply); > return ret; > } > + > + > +int qemuMonitorTextQuit(qemuMonitorPtr mon) > +{ > + const char *cmd = "quit"; > + char *reply = NULL; > + int ret = -1; > + > + if (qemuMonitorHMPCommand(mon, cmd, &reply) < 0) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + "%s", _("cannot run monitor command")); > + goto cleanup; > + } > + > + ret = 0; > + > +cleanup: > + VIR_FREE(reply); > + return ret; > +} > diff --git a/src/qemu/qemu_monitor_text.h b/src/qemu/qemu_monitor_text.h > index b250738..9ade938 100644 > --- a/src/qemu/qemu_monitor_text.h > +++ b/src/qemu/qemu_monitor_text.h > @@ -224,4 +224,5 @@ int qemuMonitorTextBlockJob(qemuMonitorPtr mon, > virDomainBlockJobInfoPtr info, > int mode); > > +int qemuMonitorTextQuit(qemuMonitorPtr mon); > #endif /* QEMU_MONITOR_TEXT_H */ > diff --git a/tools/virsh.c b/tools/virsh.c > index f1eb4ca..0c18d8b 100644 > --- a/tools/virsh.c > +++ b/tools/virsh.c > @@ -2565,6 +2565,7 @@ static const vshCmdInfo info_destroy[] = { > > static const vshCmdOptDef opts_destroy[] = { > {"domain", VSH_OT_DATA, VSH_OFLAG_REQ, N_("domain name, id or uuid")}, > + {"monitor", VSH_OT_BOOL, 0, N_("try graceful destroy via monitor first")}, > {NULL, 0, 0, NULL} > }; > > @@ -2574,6 +2575,7 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd) > virDomainPtr dom; > bool ret = true; > const char *name; > + int flags = 0; > > if (!vshConnectionUsability(ctl, ctl->conn)) > return false; > @@ -2581,7 +2583,11 @@ cmdDestroy(vshControl *ctl, const vshCmd *cmd) > if (!(dom = vshCommandOptDomain(ctl, cmd, &name))) > return false; > > - if (virDomainDestroy(dom) == 0) { > + if (vshCommandOptBool(cmd, "monitor")) > + flags |= VIR_DOMAIN_DESTROY_MONITOR; > + > + if ((!flags && virDomainDestroy(dom) == 0) || > + virDomainDestroyFlags(dom, flags) == 0) { > vshPrint(ctl, _("Domain %s destroyed\n"), name); > } else { > vshError(ctl, _("Failed to destroy domain %s"), name); > -- > 1.7.3.4 This sounds right, but the devil lies in the detail, I would add far more debug in the thread and around synchronization as this is informations we would need if something goes really wrong. Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list