On Wed, Nov 29, 2023 at 07:44:19PM +0800, tugy@xxxxxxxxxxxxxxx wrote: > From: Guoyi Tu <tugy@xxxxxxxxxxxxxxx> > > Currently, libvirt creates a thread pool with only on thread to handle all > qemu monitor events for virtual machines, In the cases that if the thread > gets stuck while handling a monitor EOF event, such as unable to kill the > virtual machine process or release resources, the events of other virtual > machine will be also blocked, which will lead to the abnormal behavior of > other virtual machines. > > For instance, when another virtual machine completes a shutdown operation > and the monitor EOF event has been queued but remains unprocessed, we > immediately destroy and start the virtual machine again, at a later time > when EOF event get processed, the processMonitorEOFEvent() will kill the > virtual machine that just started. > > To address this issue, in the processMonitorEOFEvent(), we check whether > the current virtual machine's monitor object matches the one at the time > the event was generated. If they do not match, we immediately return. > > Signed-off-by: Guoyi Tu <tugy@xxxxxxxxxxxxxxx> > Signed-off-by: dengpengcheng <dengpc12@xxxxxxxxxxxxxxx> > --- > src/qemu/qemu_domain.c | 2 +- > src/qemu/qemu_driver.c | 11 +++++++++-- > src/qemu/qemu_process.c | 2 +- > 3 files changed, 11 insertions(+), 4 deletions(-) > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 953808fcfe..435ee621df 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -11470,7 +11470,6 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > case QEMU_PROCESS_EVENT_NETDEV_STREAM_DISCONNECTED: > case QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED: > case QEMU_PROCESS_EVENT_SERIAL_CHANGED: > - case QEMU_PROCESS_EVENT_MONITOR_EOF: > case QEMU_PROCESS_EVENT_GUEST_CRASHLOADED: > g_free(event->data); > break; > @@ -11484,6 +11483,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION: > case QEMU_PROCESS_EVENT_RESET: > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > + case QEMU_PROCESS_EVENT_MONITOR_EOF: > case QEMU_PROCESS_EVENT_LAST: > break; > } > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index d00d2a27c6..57acafb48b 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -3854,7 +3854,8 @@ processJobStatusChangeEvent(virDomainObj *vm, > > static void > processMonitorEOFEvent(virQEMUDriver *driver, > - virDomainObj *vm) > + virDomainObj *vm, > + qemuMonitor *mon) > { > qemuDomainObjPrivate *priv = vm->privateData; > int eventReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN; > @@ -3863,6 +3864,12 @@ processMonitorEOFEvent(virQEMUDriver *driver, > unsigned int stopFlags = 0; > virObjectEvent *event = NULL; > > + if (priv->mon != mon) { > + VIR_DEBUG("Domain %p '%s' has been shutdown, ignoring EOF", > + vm, vm->def->name); > + return; > + } ...'mon' is a dangling pointer so its memory might have been freed, and then re-allocated the exact same memory address to the new priv->mon. IOW, this comparison is unsafe I believe. IMHO, we should record the start time of the VM in 'priv', and then pass the start time into qemuProcessEventSubmit, so we can compare it here. > + > if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) > return; > > @@ -4082,7 +4089,7 @@ static void qemuProcessEventHandler(void *data, void *opaque) > processJobStatusChangeEvent(vm, processEvent->data); > break; > case QEMU_PROCESS_EVENT_MONITOR_EOF: > - processMonitorEOFEvent(driver, vm); > + processMonitorEOFEvent(driver, vm, processEvent->data); > break; > case QEMU_PROCESS_EVENT_PR_DISCONNECT: > processPRDisconnectEvent(vm); > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index f32e82bbd1..ff54e947fe 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -316,7 +316,7 @@ qemuProcessHandleMonitorEOF(qemuMonitor *mon, > } > > qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_MONITOR_EOF, > - 0, 0, NULL); > + 0, 0, priv->mon); > > /* We don't want this EOF handler to be called over and over while the > * thread is waiting for a job. > -- > 2.17.1 > _______________________________________________ > Devel mailing list -- devel@xxxxxxxxxxxxxxxxx > To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| _______________________________________________ Devel mailing list -- devel@xxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx