On Fri, Jul 19, 2024 at 02:58:23PM +0200, Boris Fiuczynski wrote: > In cases when a QEMU process takes longer than the time sigterm and > sigkill are issued to kill the process do not simply fail and leave the > VM in state VIR_DOMAIN_SHUTDOWN until the daemon stops. Instead set up > an fd on /proc/$pid and get notified when the QEMU process finally has > terminated to cleanup the VM state. > > Resolves: https://issues.redhat.com/browse/RHEL-28819 > Signed-off-by: Boris Fiuczynski <fiuczy@xxxxxxxxxxxxx> > --- > src/qemu/qemu_domain.c | 8 +++ > src/qemu/qemu_domain.h | 2 + > src/qemu/qemu_driver.c | 18 +++++++ > src/qemu/qemu_process.c | 115 ++++++++++++++++++++++++++++++++++++++-- > src/qemu/qemu_process.h | 1 + > 5 files changed, 139 insertions(+), 5 deletions(-) > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > index 2134b11038..8147ff02fd 100644 > --- a/src/qemu/qemu_domain.c > +++ b/src/qemu/qemu_domain.c > @@ -1889,6 +1889,11 @@ qemuDomainObjPrivateFree(void *data) > > virChrdevFree(priv->devs); > > + if (priv->pidMonitored >= 0) { > + virEventRemoveHandle(priv->pidMonitored); > + priv->pidMonitored = -1; > + } > + > /* This should never be non-NULL if we get here, but just in case... */ > if (priv->mon) { > VIR_ERROR(_("Unexpected QEMU monitor still active during domain deletion")); > @@ -1934,6 +1939,8 @@ qemuDomainObjPrivateAlloc(void *opaque) > priv->blockjobs = virHashNew(virObjectUnref); > priv->fds = virHashNew(g_object_unref); > > + priv->pidMonitored = -1; > + > /* agent commands block by default, user can choose different behavior */ > priv->agentTimeout = VIR_DOMAIN_AGENT_RESPONSE_TIMEOUT_BLOCK; > priv->migMaxBandwidth = QEMU_DOMAIN_MIG_BANDWIDTH_MAX; > @@ -11680,6 +11687,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) > case QEMU_PROCESS_EVENT_RESET: > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > case QEMU_PROCESS_EVENT_MONITOR_EOF: > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: > case QEMU_PROCESS_EVENT_LAST: > break; > } > diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h > index d777559119..a5092dd7f0 100644 > --- a/src/qemu/qemu_domain.h > +++ b/src/qemu/qemu_domain.h > @@ -119,6 +119,7 @@ struct _qemuDomainObjPrivate { > > bool beingDestroyed; > char *pidfile; > + int pidMonitored; > > virDomainPCIAddressSet *pciaddrs; > virDomainUSBAddressSet *usbaddrs; > @@ -469,6 +470,7 @@ typedef enum { > QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, > QEMU_PROCESS_EVENT_RESET, > QEMU_PROCESS_EVENT_NBDKIT_EXITED, > + QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, > > QEMU_PROCESS_EVENT_LAST > } qemuProcessEventType; > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 9f3013e231..6b1e4084f6 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -4041,6 +4041,21 @@ processNbdkitExitedEvent(virDomainObj *vm, > } > > > +static void > +processShutdownCompletedEvent(virQEMUDriver *driver, > + virDomainObj *vm) > +{ > + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) > + return; > + > + if (virDomainObjIsActive(vm)) > + qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_UNKNOWN, > + VIR_ASYNC_JOB_NONE, 0); > + > + virDomainObjEndJob(vm); > +} > + > + > static void qemuProcessEventHandler(void *data, void *opaque) > { > struct qemuProcessEvent *processEvent = data; > @@ -4101,6 +4116,9 @@ static void qemuProcessEventHandler(void *data, void *opaque) > case QEMU_PROCESS_EVENT_NBDKIT_EXITED: > processNbdkitExitedEvent(vm, processEvent->data); > break; > + case QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED: > + processShutdownCompletedEvent(driver, vm); > + break; > case QEMU_PROCESS_EVENT_LAST: > break; > } > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > index 25dfd04272..86ca495a20 100644 > --- a/src/qemu/qemu_process.c > +++ b/src/qemu/qemu_process.c > @@ -25,6 +25,7 @@ > #include <unistd.h> > #include <signal.h> > #include <sys/stat.h> > +#include <sys/syscall.h> > #if defined(__linux__) > # include <linux/capability.h> > #elif defined(__FreeBSD__) > @@ -8387,9 +8388,107 @@ qemuProcessCreatePretendCmdBuild(virDomainObj *vm, > } > > > +typedef struct { > + virDomainObj *vm; > + int pidfd; > +} qemuProcessInShutdownEventData; > + > + > +static qemuProcessInShutdownEventData* > +qemuProcessInShutdownEventDataNew(virDomainObj *vm, int pidfd) > +{ > + qemuProcessInShutdownEventData *d = g_new(qemuProcessInShutdownEventData, 1); > + d->vm = virObjectRef(vm); > + d->pidfd = pidfd; > + return d; > +} > + > + > +static void > +qemuProcessInShutdownEventDataFree(qemuProcessInShutdownEventData *d) > +{ > + virObjectUnref(d->vm); > + VIR_FORCE_CLOSE(d->pidfd); > + g_free(d); > +} > + > + > +static void > +qemuProcessInShutdownPidfdCb(int watch, > + int fd, > + int events G_GNUC_UNUSED, > + void *opaque) > +{ > + qemuProcessInShutdownEventData *data = opaque; > + virDomainObj *vm = data->vm; > + > + VIR_DEBUG("vm=%p name=%s pid=%lld fd=%d", > + vm, vm->def->name, (long long)vm->pid, fd); > + > + virEventRemoveHandle(watch); > + > + virObjectLock(vm); > + > + VIR_INFO("QEMU process %lld finally completed termination", > + (long long)vm->pid); > + > + QEMU_DOMAIN_PRIVATE(vm)->pidMonitored = -1; > + qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_SHUTDOWN_COMPLETED, > + 0, 0, NULL); > + > + virObjectUnlock(vm); > +} > + > + > +static int > +qemuProcessInShutdownStartMonitor(virDomainObj *vm) > +{ > + qemuDomainObjPrivate *priv = vm->privateData; > + qemuProcessInShutdownEventData *data; > + int pidfd; > + int ret = -1; > + > + VIR_DEBUG("vm=%p name=%s pid=%lld pidMonitored=%d", > + vm, vm->def->name, (long long)vm->pid, > + priv->pidMonitored); > + > + if (priv->pidMonitored >= 0) { > + VIR_DEBUG("Monitoring qemu in-shutdown process %i already set up", vm->pid); > + goto cleanup; > + } > + > + pidfd = syscall(SYS_pidfd_open, vm->pid, 0); > + if (pidfd < 0) { > + if (errno == ESRCH) /* process has already terminated */ > + ret = 1; > + goto cleanup; > + } > + > + data = qemuProcessInShutdownEventDataNew(vm, pidfd); > + if ((priv->pidMonitored = virEventAddHandle(pidfd, > + VIR_EVENT_HANDLE_READABLE, > + qemuProcessInShutdownPidfdCb, > + data, > + (virFreeCallback)qemuProcessInShutdownEventDataFree)) < 0) { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("failed to monitor qemu in-shutdown process %1$i"), > + vm->pid); > + qemuProcessInShutdownEventDataFree(data); > + goto cleanup; > + } > + VIR_DEBUG("Monitoring qemu in-shutdown process %i for termination", vm->pid); > + ret = 0; > + > + cleanup: > + return ret; > +} This will need some #ifdef-ery because I believe SYS_pidfd_open won't exist on all Linux versions we need, and certainly won't exist on non-Linux, and nor will syscall() fro that matter. > + > + > int > qemuProcessKill(virDomainObj *vm, unsigned int flags) > { > + int ret = -1; > + > VIR_DEBUG("vm=%p name=%s pid=%lld flags=0x%x", > vm, vm->def->name, > (long long)vm->pid, flags); > @@ -8410,10 +8509,16 @@ qemuProcessKill(virDomainObj *vm, unsigned int flags) > > /* Request an extra delay of two seconds per current nhostdevs > * to be safe against stalls by the kernel freeing up the resources */ > - return virProcessKillPainfullyDelay(vm->pid, > - !!(flags & VIR_QEMU_PROCESS_KILL_FORCE), > - vm->def->nhostdevs * 2, > - false); > + ret = virProcessKillPainfullyDelay(vm->pid, > + !!(flags & VIR_QEMU_PROCESS_KILL_FORCE), > + vm->def->nhostdevs * 2, > + false); > + > + if (ret < 0 && (flags & VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR)) > + if (qemuProcessInShutdownStartMonitor(vm) == 1) > + ret = 0; /* process termination detected */ > + > + return ret; > } > > > @@ -8438,7 +8543,7 @@ qemuProcessBeginStopJob(virDomainObj *vm, > * cleared inside qemuProcessStop */ > priv->beingDestroyed = true; > > - if (qemuProcessKill(vm, killFlags) < 0) > + if (qemuProcessKill(vm, killFlags|VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR) < 0) > goto error; > > /* Wake up anything waiting on domain condition */ > diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h > index cb67bfcd2d..2324aeb7bd 100644 > --- a/src/qemu/qemu_process.h > +++ b/src/qemu/qemu_process.h > @@ -180,6 +180,7 @@ typedef enum { > VIR_QEMU_PROCESS_KILL_FORCE = 1 << 0, > VIR_QEMU_PROCESS_KILL_NOWAIT = 1 << 1, > VIR_QEMU_PROCESS_KILL_NOCHECK = 1 << 2, /* bypass the running vm check */ > + VIR_QEMU_PROCESS_KILL_MONITOR_ON_ERROR = 1 << 3, /* on error enable process monitor */ > } virQemuProcessKillMode; > > int qemuProcessKill(virDomainObj *vm, unsigned int flags); > -- > 2.45.0 > With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|