On 7/19/24 17:44, Boris Fiuczynski wrote: > In cases when a QEMU process takes longer than the time sigterm and > sigkill are issued to kill the process do not simply fail and leave the > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up > an fd on /proc/$pid and get notified when the QEMU process finally has > terminated to cleanup the VM state. > > Resolves: https://issues.redhat.com/browse/RHEL-28819 > Signed-off-by: Boris Fiuczynski <fiuczy@xxxxxxxxxxxxx> > --- > src/qemu/qemu_domain.c | 8 +++ > src/qemu/qemu_domain.h | 2 + > src/qemu/qemu_driver.c | 18 ++++++ > src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- > src/qemu/qemu_process.h | 1 + > 5 files changed, 148 insertions(+), 5 deletions(-) > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 2134b11038..8147ff02fd 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) > > virChrdevFree(priv->devs); > > + if (priv->pidMonitored >= 0) { > + virEventRemoveHandle(priv->pidMonitored); > + priv->pidMonitored = -1; > + } > + > /* This should never be non-NULL if we get here, but just in case... */ > if (priv->mon) { > VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) > priv->blockjobs = virHashNew(virObjectUnref); > priv->fds = virHashNew(g_object_unref); > > + priv->pidMonitored = -1; > + > /* agent commands block by default, user can choose different behavior */ > priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > case QEMU_PROCESS_EVENT_RESET: > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > case QEMU_PROCESS_EVENT_MONITOR_EOF: > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: > case QEMU_PROCESS_EVENT_LAST: > break; > } > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > index d777559119..a5092dd7f0 100644 > --- a/src/qemu/qemu_domain.h > +++ b/src/qemu/qemu_domain.h > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { > > bool beingDestroyed; > char *pidfile; > + int pidMonitored; > > virDomainPCIAddressSet *pciaddrs; > virDomainUSBAddressSet *usbaddrs; > @@ -469,6 +470,7 @@ typedef enum { > QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, > QEMU_PROCESS_EVENT_RESET, > QEMU_PROCESS_EVENT_NBDKIT_EXITED, > + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, > > QEMU_PROCESS_EVENT_LAST > } qemuProcessEventType; > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 9f3013e231..6b1e4084f6 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, > } > > > +static void > +processShutdownCompletedEvent(virQEMUDriver *driver, > + virDomainObj *vm) > +{ > + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) > + return; Shouldn't this be: if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) return; Otherwise looking good. No need to resend, I can fix that before pushing. Michal