On 02/07/2013 06:36 AM, Michal Privoznik wrote: > With current implementation, virCommandWait unregister the stdio > callback and finish reading itself. However, the eventloop may > already have been in process of executing the callback in which > case one obtain these obscure error messages, like: > > 2013-02-06 11:41:43.766+0000: 19078: warning : virEventPollRemoveHandle:183 : Ignoring invalid remove watch -1 > 2013-02-06 11:41:43.766+0000: 19078: warning : virFileClose:65 : Tried to close invalid fd 25 > > The solution is to introduce a mutex and condition to virCommand, > and wait in virCommandWait for the eventloop to process all IOs. > This, however, requires all callees to unlock all mutexes held, > otherwise the eventloop may try to lock one of them and experience > deadlock. If that's the case, we will never be woken up to unlock > the problematic mutex. > --- > src/qemu/qemu_driver.c | 58 +++++++++++++++++++++++++--- > src/qemu/qemu_migration.c | 15 +++++++- > src/util/vircommand.c | 98 ++++++++++++++++++++++++++++++++++++----------- > src/util/virfile.c | 4 ++ > 4 files changed, 146 insertions(+), 29 deletions(-) > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 979a027..e10fc89 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -2757,6 +2757,7 @@ qemuDomainSaveMemory(virQEMUDriverPtr driver, > int fd = -1; > int directFlag = 0; > virFileWrapperFdPtr wrapperFd = NULL; > + int wrapperFd_ret; > unsigned int wrapperFlags = VIR_FILE_WRAPPER_NON_BLOCKING; > unsigned long long pad; > unsigned long long offset; > @@ -2833,7 +2834,15 @@ qemuDomainSaveMemory(virQEMUDriverPtr driver, > goto cleanup; > } > > - if (virFileWrapperFdClose(wrapperFd) < 0) > + /* virCommandDoAsyncIO requires us to release all locks held */ > + virObjectRef(vm); > + virObjectUnlock(vm); > + qemuDriverUnlock(driver); > + wrapperFd_ret = virFileWrapperFdClose(wrapperFd); > + qemuDriverLock(driver); > + virObjectLock(vm); > + virObjectUnref(vm); > + if (wrapperFd_ret < 0) > goto cleanup; > > if ((fd = qemuOpenFile(driver, path, O_WRONLY, NULL, NULL)) < 0) > @@ -3240,6 +3249,7 @@ doCoreDump(virQEMUDriverPtr driver, > int fd = -1; > int ret = -1; > virFileWrapperFdPtr wrapperFd = NULL; > + int wrapperFd_ret; > int directFlag = 0; > unsigned int flags = VIR_FILE_WRAPPER_NON_BLOCKING; > > @@ -3281,7 +3291,16 @@ doCoreDump(virQEMUDriverPtr driver, > path); > goto cleanup; > } > - if (virFileWrapperFdClose(wrapperFd) < 0) > + > + /* virCommandDoAsyncIO requires us to release all locks held */ > + virObjectRef(vm); > + virObjectUnlock(vm); > + qemuDriverUnlock(driver); > + wrapperFd_ret = virFileWrapperFdClose(wrapperFd); > + qemuDriverLock(driver); > + virObjectLock(vm); > + virObjectUnref(vm); > + if (wrapperFd_ret < 0) > goto cleanup; > > ret = 0; > @@ -4854,6 +4873,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, > virDomainEventPtr event; > int intermediatefd = -1; > virCommandPtr cmd = NULL; > + int cmd_ret; > char *errbuf = NULL; > virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); > > @@ -4901,7 +4921,15 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, > VIR_FORCE_CLOSE(*fd); > } > > - if (virCommandWait(cmd, NULL) < 0) { > + /* virCommandDoAsyncIO requires us to release all locks held */ > + virObjectRef(vm); > + virObjectUnlock(vm); > + qemuDriverUnlock(driver); > + cmd_ret = virCommandWait(cmd, NULL); > + qemuDriverLock(driver); > + virObjectLock(vm); > + virObjectUnref(vm); > + if (cmd_ret < 0) { > qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, 0); > ret = -1; > } > @@ -4976,6 +5004,7 @@ qemuDomainRestoreFlags(virConnectPtr conn, > int ret = -1; > virQEMUSaveHeader header; > virFileWrapperFdPtr wrapperFd = NULL; > + int wrapperFd_ret; > int state = -1; > > virCheckFlags(VIR_DOMAIN_SAVE_BYPASS_CACHE | > @@ -5009,7 +5038,16 @@ qemuDomainRestoreFlags(virConnectPtr conn, > > ret = qemuDomainSaveImageStartVM(conn, driver, vm, &fd, &header, path, > false); > - if (virFileWrapperFdClose(wrapperFd) < 0) > + > + /* virCommandDoAsyncIO requires us to release all locks held */ > + virObjectRef(vm); > + virObjectUnlock(vm); > + qemuDriverUnlock(driver); > + wrapperFd_ret = virFileWrapperFdClose(wrapperFd); > + qemuDriverLock(driver); > + virObjectLock(vm); > + virObjectUnref(vm); > + if (wrapperFd_ret < 0) > VIR_WARN("Failed to close %s", path); > > if (qemuDomainObjEndJob(driver, vm) == 0) > @@ -5153,6 +5191,7 @@ qemuDomainObjRestore(virConnectPtr conn, > int ret = -1; > virQEMUSaveHeader header; > virFileWrapperFdPtr wrapperFd = NULL; > + int wrapperFd_ret; > > fd = qemuDomainSaveImageOpen(driver, path, &def, &header, > bypass_cache, &wrapperFd, NULL, -1, false, > @@ -5182,7 +5221,16 @@ qemuDomainObjRestore(virConnectPtr conn, > > ret = qemuDomainSaveImageStartVM(conn, driver, vm, &fd, &header, path, > start_paused); > - if (virFileWrapperFdClose(wrapperFd) < 0) > + > + /* virCommandDoAsyncIO requires us to release all locks held */ > + virObjectRef(vm); > + virObjectUnlock(vm); > + qemuDriverUnlock(driver); > + wrapperFd_ret = virFileWrapperFdClose(wrapperFd); > + qemuDriverLock(driver); > + virObjectLock(vm); > + virObjectUnref(vm); > + if (wrapperFd_ret < 0) > VIR_WARN("Failed to close %s", path); > > cleanup: > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index a75fb4e..ddbfd18 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c > @@ -3610,6 +3610,7 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm, > int rc; > bool restoreLabel = false; > virCommandPtr cmd = NULL; > + int cmd_ret; > int pipeFD[2] = { -1, -1 }; > unsigned long saveMigBandwidth = priv->migMaxBandwidth; > char *errbuf = NULL; > @@ -3729,8 +3730,18 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm, > if (rc < 0) > goto cleanup; > > - if (cmd && virCommandWait(cmd, NULL) < 0) > - goto cleanup; > + if (cmd) { > + /* virCommandDoAsyncIO requires us to release all locks held */ > + virObjectRef(vm); > + virObjectUnlock(vm); > + qemuDriverUnlock(driver); > + cmd_ret = virCommandWait(cmd, NULL); > + qemuDriverLock(driver); > + virObjectLock(vm); > + virObjectUnref(vm); > + if (cmd_ret < 0) > + goto cleanup; > + } > > ret = 0; > > diff --git a/src/util/vircommand.c b/src/util/vircommand.c > index db7dbca..97ff135 100644 > --- a/src/util/vircommand.c > +++ b/src/util/vircommand.c > @@ -42,6 +42,7 @@ > #include "virpidfile.h" > #include "virprocess.h" > #include "virbuffer.h" > +#include "virthread.h" > > #define VIR_FROM_THIS VIR_FROM_NONE > > @@ -90,6 +91,9 @@ struct _virCommand { > int outWatch; > int errWatch; > > + virMutex lock; > + virCond cond; > + > bool handshake; > int handshakeWait[2]; > int handshakeNotify[2]; > @@ -788,9 +792,24 @@ virCommandNewArgs(const char *const*args) > cmd->inWatch = cmd->outWatch = cmd->errWatch = -1; > cmd->pid = -1; > > + if (virMutexInit(&cmd->lock) < 0) { > + virReportSystemError(errno, "%s", _("Unable to init mutex")); > + goto error; > + } > + > + if (virCondInit(&cmd->cond) < 0) { > + virReportSystemError(errno, "%s", _("Unable to init cond")); > + virMutexDestroy(&cmd->lock); > + goto error; > + } > + > virCommandAddArgSet(cmd, args); > > return cmd; > + > +error: > + VIR_FREE(cmd); > + return NULL; > } > > /** > @@ -2146,6 +2165,8 @@ virCommandHandleReadWrite(int watch, int fd, int events, void *opaque) > bool eof = false; > int *fdptr = NULL, **fdptrptr = NULL; > > + virMutexLock(&cmd->lock); > + > VIR_DEBUG("watch=%d fd=%d events=%d", watch, fd, events); > errno = 0; > > @@ -2190,14 +2211,21 @@ virCommandHandleReadWrite(int watch, int fd, int events, void *opaque) > bufptr = &cmd->outbuf; > fdptr = &cmd->outfd; > fdptrptr = &cmd->outfdptr; > - } else { > + } else if (watch == cmd->errWatch) { > watchPtr = &cmd->errWatch; > bufptr = &cmd->errbuf; > fdptr = &cmd->errfd; > fdptrptr = &cmd->errfdptr; > + } else { > + /* Suspicious wakeup ... */ > + virCondBroadcast(&cmd->cond); > + virMutexUnlock(&cmd->lock); > + return; > } > > - if (events & VIR_EVENT_HANDLE_READABLE) { > + if (events & (VIR_EVENT_HANDLE_READABLE | > + VIR_EVENT_HANDLE_HANGUP | > + VIR_EVENT_HANDLE_ERROR)) { > if (**bufptr) > len = strlen(**bufptr); > > @@ -2241,7 +2269,9 @@ virCommandHandleReadWrite(int watch, int fd, int events, void *opaque) > *bufptr = NULL; > if (fdptrptr) > *fdptrptr = NULL; > + virCondBroadcast(&cmd->cond); > } > + virMutexUnlock(&cmd->lock); > } > > > @@ -2377,24 +2407,24 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) My gcc complained: util/vircommand.c: In function 'virCommandRunAsync': util/vircommand.c:2473:8: error: 'ret' may be used uninitialized in this function [-Werror=maybe-uninitialized] cc1: all warnings being treated as errors make[3]: *** [libvirt_util_la-vircommand.lo] Error 1 make[3]: Leaving directory `/home/jferlan/libvirt.coverity/src' make[2]: *** [all] Error 2 make[2]: Leaving directory `/home/jferlan/libvirt.coverity/src' make[1]: *** [all-recursive] Error 1 make[1]: Leaving directory `/home/jferlan/libvirt.coverity' make: *** [all] Error 2 Need to have a ret = -1 > virReportError(VIR_ERR_INTERNAL_ERROR, > _("command is already running as pid %lld"), > (long long) cmd->pid); > - return -1; > + goto cleanup; > } > > if (!synchronous && (cmd->flags & VIR_EXEC_DAEMON)) { > virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > _("daemonized command cannot use virCommandRunAsync")); > - return -1; > + goto cleanup; > } > if (cmd->pwd && (cmd->flags & VIR_EXEC_DAEMON)) { > virReportError(VIR_ERR_INTERNAL_ERROR, > _("daemonized command cannot set working directory %s"), > cmd->pwd); > - return -1; > + goto cleanup; > } > if (cmd->pidfile && !(cmd->flags & VIR_EXEC_DAEMON)) { > virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > _("creation of pid file requires daemonized command")); > - return -1; > + goto cleanup; > } > > str = virCommandToString(cmd); > @@ -2439,6 +2469,12 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) > ret = virCommandRegisterEventLoop(cmd); > } > > +cleanup: > + if (ret < 0) { > + VIR_FORCE_CLOSE(infd[0]); > + VIR_FORCE_CLOSE(infd[1]); > + cmd->infd = -1; > + } > return ret; > } > > @@ -2453,13 +2489,16 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) > * completion. Returns 0 if the command > * finished with the exit status set. If @exitstatus is NULL, then the > * child must exit with status 0 for this to succeed. > + * > + * In case virCommandDoAsyncIO() has been requested previously on > + * @cmd, this command may block. So you are required to unlock all > + * locks held prior calling this function. > */ > int > virCommandWait(virCommandPtr cmd, int *exitstatus) > { > int ret; > int status = 0; > - const int events = VIR_EVENT_HANDLE_READABLE | VIR_EVENT_HANDLE_HANGUP; > > if (!cmd ||cmd->has_error == ENOMEM) { > virReportOOMError(); > @@ -2485,22 +2524,25 @@ virCommandWait(virCommandPtr cmd, int *exitstatus) > * and repeat the exitstatus check code ourselves. */ > ret = virProcessWait(cmd->pid, exitstatus ? exitstatus : &status); > > - if (cmd->inWatch != -1) { > - virEventRemoveHandle(cmd->inWatch); > - cmd->inWatch = -1; > - } > - > - if (cmd->outWatch != -1) { > - virEventRemoveHandle(cmd->outWatch); > - virCommandHandleReadWrite(cmd->outWatch, cmd->outfd, events, cmd); > - cmd->outWatch = -1; > - } > - > - if (cmd->errWatch != -1) { > - virEventRemoveHandle(cmd->errWatch); > - virCommandHandleReadWrite(cmd->errWatch, cmd->errfd, events, cmd); > - cmd->errWatch = -1; > + /* Hopefully, all locks has been unlocked. Otherwise, the > + * eventloop might be trying to lock one of them again, which > + * will deadlock (obviously), and we will never be woken up here */ > + virMutexLock(&cmd->lock); > + while (!(cmd->inWatch == -1 && > + cmd->outWatch == -1 && > + cmd->errWatch == -1)) { > + VIR_DEBUG("Waiting on in=%d out=%d err=%d", > + cmd->inWatch, cmd->outWatch, cmd->errWatch); > + > + if (virCondWait(&cmd->cond, &cmd->lock) < 0) { > + virReportSystemError(errno, "%s", > + _("Unable to wait on condition")); > + break; > + } > + VIR_DEBUG("Done waiting on in=%d out=%d err=%d", > + cmd->inWatch, cmd->outWatch, cmd->errWatch); > } > + virMutexUnlock(&cmd->lock); > > if (ret == 0) { > cmd->pid = -1; > @@ -2719,6 +2761,9 @@ virCommandFree(virCommandPtr cmd) > VIR_FORCE_CLOSE(cmd->transfer[i]); > } > > + virMutexDestroy(&cmd->lock); > + ignore_value(virCondDestroy(&cmd->cond)); > + > VIR_FREE(cmd->inbuf); > VIR_FORCE_CLOSE(cmd->outfd); > VIR_FORCE_CLOSE(cmd->errfd); > @@ -2772,6 +2817,7 @@ virCommandFree(virCommandPtr cmd) > * > * ... > * > + * // release all locks held (qemu, virDomainObj, ... ) > * if (virCommandWait(cmd, NULL) < 0) > * goto cleanup; > * > @@ -2789,6 +2835,14 @@ virCommandFree(virCommandPtr cmd) > * of data to be written to @cmd's stdin, don't pass any binary > * data. If you want to re-run command, you need to call this and > * buffer setting functions (virCommandSet.*Buffer) prior each run. > + * > + * Moreover, you are required to unlock all mutexes held prior calling > + * virCommandWait. Due to current implementation, the virCommandWait > + * waits for the event loop to finish all IOs on @cmd. However, if we > + * are holding a locked mutex already and the event loop decides to > + * issue a different callback which tries to lock the mutex it will > + * deadlock and don't call any other callbacks. Not even the one which > + * will unlock us. > */ > void > virCommandDoAsyncIO(virCommandPtr cmd) > diff --git a/src/util/virfile.c b/src/util/virfile.c > index b4765fb..bb33801 100644 > --- a/src/util/virfile.c > +++ b/src/util/virfile.c > @@ -278,6 +278,10 @@ virFileWrapperFdNew(int *fd ATTRIBUTE_UNUSED, > * callers can conditionally create a virFileWrapperFd wrapper but > * unconditionally call the cleanup code. To avoid deadlock, only > * call this after closing the fd resulting from virFileWrapperFdNew(). > + * > + * Since the virFileWrapperFdNew has requested asynchronous IO on > + * underlying virCommand and this uses virCommandWait so we are > + * required to unlock all locks held. > */ > int > virFileWrapperFdClose(virFileWrapperFdPtr wfd) > FYI: commandtest.c fails during valgrind test with lost resources for tests 0, 1, 4, & 18. John -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list