On Fri, Mar 18, 2011 at 11:20:56AM +0000, Daniel P. Berrange wrote: > On Wed, Mar 16, 2011 at 06:30:20PM +0800, Hu Tao wrote: > > --- > > src/qemu/qemu_domain.c | 26 +----------- > > src/qemu/qemu_monitor.c | 106 ++++++++++++++++++++++++++-------------------- > > src/qemu/qemu_monitor.h | 4 +- > > 3 files changed, 64 insertions(+), 72 deletions(-) > > > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > > index 8a2b9cc..e056ef0 100644 > > --- a/src/qemu/qemu_domain.c > > +++ b/src/qemu/qemu_domain.c > > @@ -566,7 +566,6 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj) > > qemuDomainObjPrivatePtr priv = obj->privateData; > > > > qemuMonitorLock(priv->mon); > > - qemuMonitorRef(priv->mon); > > virDomainObjUnlock(obj); > > } > > > > @@ -578,19 +577,9 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj) > > void qemuDomainObjExitMonitor(virDomainObjPtr obj) > > { > > qemuDomainObjPrivatePtr priv = obj->privateData; > > - int refs; > > - > > - refs = qemuMonitorUnref(priv->mon); > > - > > - if (refs > 0) > > - qemuMonitorUnlock(priv->mon); > > > > + qemuMonitorUnlock(priv->mon); > > virDomainObjLock(obj); > > - > > - if (refs == 0) { > > - virDomainObjUnref(obj); > > - priv->mon = NULL; > > - } > > } > > > > > > @@ -608,7 +597,6 @@ void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, > > qemuDomainObjPrivatePtr priv = obj->privateData; > > > > qemuMonitorLock(priv->mon); > > - qemuMonitorRef(priv->mon); > > virDomainObjUnlock(obj); > > qemuDriverUnlock(driver); > > } > > @@ -623,20 +611,10 @@ void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, > > virDomainObjPtr obj) > > { > > qemuDomainObjPrivatePtr priv = obj->privateData; > > - int refs; > > - > > - refs = qemuMonitorUnref(priv->mon); > > - > > - if (refs > 0) > > - qemuMonitorUnlock(priv->mon); > > > > + qemuMonitorUnlock(priv->mon); > > qemuDriverLock(driver); > > virDomainObjLock(obj); > > - > > - if (refs == 0) { > > - virDomainObjUnref(obj); > > - priv->mon = NULL; > > - } > > } > > I don't think these changes aren't right. The EnterMonitor() method > is releasing the lock on the virDomainObjPtr. This means that other > threads (specifically the main I/O thread) can use the virDomainObjPtr > and its qemuMonitorPtr instance. Incrementing reference count on the > qemuMonitorPtr is the only thing that prevents it being free()d while > it is unlocked by this thread. > > eg, consider this > > In the main thread making a monitor call > > qemuDomainObjEnterMonitor(obj) > > qemuMonitorSetCPU(mon, 1, 1); > > qemuDomainObjExitMonitor(obj) > > > So, at step 2 there, qemuMonitorSetCPU() is asleep on the condition > variable, waiting for the reply from QEMU. > > > Now QEMU dies for some reason, so in the I/O thread, we have > qemuMonitorIO() called. This sees the EOF on the socket, so > it invokes the callback "(eofNotify)(mon, vm, failed);". > > This callback is qemuMonitorClose(), which calls qemuMonitorFree() > which releases the last reference count, which deletes the condition > variable. The other thread is now waiting on a condition variable > which has been deleted. It'll either hang forever, or crash > > We *must* prevent the monitor being free'd while a thread is running > a command, hence qemuDomainObjEnterMonitor() must call qemuMonitorRef() > and qemuDomainObjExitMonitor() must call qemuMonitorUnref(). You're all right except on when to ref the monitor. The monitor may be freed by another thread before call of qemuDomainObjEnterMonitor(), so call of qemuMonitorRef() at this time doesn't help. I think the monitor must have already been refed before a thread starts operating it. (After checking this patch carefully, I found this patch doesn't do so, and it is buggy) > > > -int qemuMonitorUnref(qemuMonitorPtr mon) > > +static void qemuMonitorFree(qemuMonitorPtr mon) > > { > > - mon->refs--; > > + virObjectUnref(&mon->obj); > > +} > > We should just get rid fo qemuMonitorFree() entirely > and make everyone call qemuMonitorUnref(). Or vica-verca. > There's no point having both methods do the same thing. qemuMonitorFree is paired with qemuMonitorAlloc to prevent people from being confused if they have to call qemuMonitorUnref to free a monitor. But it's OK to get rid of qemuMonitorFree() if well documented. > > > > > - if (mon->refs == 0) { > > - qemuMonitorUnlock(mon); > > - qemuMonitorFree(mon); > > - return 0; > > - } > > +void qemuMonitorRef(qemuMonitorPtr mon) > > +{ > > + virObjectRef(&mon->obj); > > +} > > > > - return mon->refs; > > +void qemuMonitorUnref(qemuMonitorPtr mon) > > +{ > > + virObjectUnref(&mon->obj); > > } > > > > static void > > @@ -238,7 +269,6 @@ qemuMonitorUnwatch(void *monitor) > > { > > qemuMonitorPtr mon = monitor; > > > > - qemuMonitorLock(mon); > > qemuMonitorUnref(mon); > > } > > > > @@ -519,7 +549,6 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) { > > > > /* lock access to the monitor and protect fd */ > > qemuMonitorLock(mon); > > - qemuMonitorRef(mon); > > #if DEBUG_IO > > VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch, fd, events); > > #endif > > @@ -587,13 +616,11 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) { > > virDomainObjPtr vm = mon->vm; > > /* Make sure anyone waiting wakes up now */ > > virCondSignal(&mon->notify); > > - if (qemuMonitorUnref(mon) > 0) > > - qemuMonitorUnlock(mon); > > + qemuMonitorUnlock(mon); > > VIR_DEBUG("Triggering EOF callback error? %d", failed); > > (eofNotify)(mon, vm, failed); > > } else { > > - if (qemuMonitorUnref(mon) > 0) > > - qemuMonitorUnlock(mon); > > + qemuMonitorUnlock(mon); > > } > > } > > > > > > > @@ -612,30 +639,13 @@ qemuMonitorOpen(virDomainObjPtr vm, > > return NULL; > > } > > > > - if (VIR_ALLOC(mon) < 0) { > > - virReportOOMError(); > > + mon = qemuMonitorAlloc(); > > + if (!mon) > > return NULL; > > - } > > > > - if (virMutexInit(&mon->lock) < 0) { > > - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > - _("cannot initialize monitor mutex")); > > - VIR_FREE(mon); > > - return NULL; > > - } > > - if (virCondInit(&mon->notify) < 0) { > > - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > - _("cannot initialize monitor condition")); > > - virMutexDestroy(&mon->lock); > > - VIR_FREE(mon); > > - return NULL; > > - } > > - mon->fd = -1; > > - mon->refs = 1; > > mon->vm = vm; > > mon->json = json; > > mon->cb = cb; > > - qemuMonitorLock(mon); > > > > switch (config->type) { > > case VIR_DOMAIN_CHR_TYPE_UNIX: > > @@ -668,6 +678,12 @@ qemuMonitorOpen(virDomainObjPtr vm, > > } > > > > > > + /* mon will be accessed by qemuMonitorIO which is called in > > + * event thread, so ref it before passing it to the thread. > > + * > > + * Note: mon is unrefed in qemuMonitorUnwatch > > + */ > > + qemuMonitorRef(mon); > > if ((mon->watch = virEventAddHandle(mon->fd, > > VIR_EVENT_HANDLE_HANGUP | > > VIR_EVENT_HANDLE_ERROR | > > @@ -678,10 +694,8 @@ qemuMonitorOpen(virDomainObjPtr vm, > > _("unable to register monitor events")); > > goto cleanup; > > } > > - qemuMonitorRef(mon); > > Hmm, there are quite afew other code paths higher up in this > function which also do 'goto cleanup'. You've added an extra > qemuMonitorFree() call in 'cleanup:', but only some of the > places which jump there actually obtain an extra reference. > Is that really correct ? Sorry lack of a call to qemuMonitorUnref(mon) if virEventAddHandle fails. > > > > > VIR_DEBUG("New mon %p fd =%d watch=%d", mon, mon->fd, mon->watch); > > - qemuMonitorUnlock(mon); > > > > return mon; > > > > @@ -692,8 +706,8 @@ cleanup: > > * so kill the callbacks now. > > */ > > mon->cb = NULL; > > - qemuMonitorUnlock(mon); > > qemuMonitorClose(mon); > > + qemuMonitorFree(mon); > > return NULL; > > } > > > > @@ -713,8 +727,8 @@ void qemuMonitorClose(qemuMonitorPtr mon) > > VIR_FORCE_CLOSE(mon->fd); > > } > > > > - if (qemuMonitorUnref(mon) > 0) > > - qemuMonitorUnlock(mon); > > + qemuMonitorUnlock(mon); > > + qemuMonitorFree(mon); > > } > > Daniel > -- > |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| > |: http://libvirt.org -o- http://virt-manager.org :| > |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| > |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list