On Mon, Jul 22, 2024 at 10:55:05 +0200, Michal Prívozník wrote: > On 7/19/24 17:44, Boris Fiuczynski wrote: > > In cases when a QEMU process takes longer than the time sigterm and > > sigkill are issued to kill the process do not simply fail and leave the > > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up > > an fd on /proc/$pid and get notified when the QEMU process finally has > > terminated to cleanup the VM state. > > > > Resolves: https://issues.redhat.com/browse/RHEL-28819 > > Signed-off-by: Boris Fiuczynski <fiuczy@xxxxxxxxxxxxx> > > --- > > src/qemu/qemu_domain.c | 8 +++ > > src/qemu/qemu_domain.h | 2 + > > src/qemu/qemu_driver.c | 18 ++++++ > > src/qemu/qemu_process.c | 124 ++++++++++++++++++++++++++++++++++++++-- > > src/qemu/qemu_process.h | 1 + > > 5 files changed, 148 insertions(+), 5 deletions(-) > > > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > > index 2134b11038..8147ff02fd 100644 > > --- a/src/qemu/qemu_domain.c > > +++ b/src/qemu/qemu_domain.c > > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) > > > > virChrdevFree(priv->devs); > > > > + if (priv->pidMonitored >= 0) { > > + virEventRemoveHandle(priv->pidMonitored); > > + priv->pidMonitored = -1; > > + } > > + > > /* This should never be non-NULL if we get here, but just in case... */ > > if (priv->mon) { > > VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); > > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) > > priv->blockjobs = virHashNew(virObjectUnref); > > priv->fds = virHashNew(g_object_unref); > > > > + priv->pidMonitored = -1; > > + > > /* agent commands block by default, user can choose different behavior */ > > priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; > > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; > > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > > case QEMU_PROCESS_EVENT_RESET: > > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > > case QEMU_PROCESS_EVENT_MONITOR_EOF: > > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: > > case QEMU_PROCESS_EVENT_LAST: > > break; > > } > > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > > index d777559119..a5092dd7f0 100644 > > --- a/src/qemu/qemu_domain.h > > +++ b/src/qemu/qemu_domain.h > > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { > > > > bool beingDestroyed; > > char *pidfile; > > + int pidMonitored; > > > > virDomainPCIAddressSet *pciaddrs; > > virDomainUSBAddressSet *usbaddrs; > > @@ -469,6 +470,7 @@ typedef enum { > > QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, > > QEMU_PROCESS_EVENT_RESET, > > QEMU_PROCESS_EVENT_NBDKIT_EXITED, > > + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, > > > > QEMU_PROCESS_EVENT_LAST > > } qemuProcessEventType; > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > > index 9f3013e231..6b1e4084f6 100644 > > --- a/src/qemu/qemu_driver.c > > +++ b/src/qemu/qemu_driver.c > > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, > > } > > > > > > +static void > > +processShutdownCompletedEvent(virQEMUDriver *driver, > > + virDomainObj *vm) > > +{ > > + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) > > + return; > > Shouldn't this be: > > if (qemuProcessBeginStopJob(vm, VIR_JOB_DESTROY, true) < 0) > return; > > Otherwise looking good. No need to resend, I can fix that before pushing. And followed by qemuProcessEndStopJob after calling qemuProcessStop. Jirka