On Thu, Oct 20, 2022 at 16:59:09 -0500, Jonathon Jongsma wrote: > Adds the ability to monitor the nbdkit process so that we can take > action in case the child exits unexpectedly. > > When the nbdkit process exits, we pause the vm, restart nbdkit, and then > resume the vm. This allows the vm to continue working in the event of a > nbdkit failure. > > Eventually we may want to generalize this functionality since we may > need something similar for e.g. qemu-storage-daemon, etc. > > The process is monitored with the pidfd_open() syscall if it exists > (since linux 5.3). Otherwise it resorts to checking whether the process > is alive once a second. The one-second time period was chosen somewhat > arbitrarily. > > Signed-off-by: Jonathon Jongsma <jjongsma@xxxxxxxxxx> > --- > meson.build | 3 + > src/qemu/qemu_nbdkit.c | 220 ++++++++++++++++++++++++++++++++++++++++ > src/qemu/qemu_nbdkit.h | 10 ++ > src/qemu/qemu_process.c | 13 +++ > 4 files changed, 246 insertions(+) > > diff --git a/meson.build b/meson.build > index e4581e74dd..b4ed170ca1 100644 > --- a/meson.build > +++ b/meson.build > @@ -686,6 +686,9 @@ if host_machine.system() == 'linux' > # Check if we have new enough kernel to support BPF devices for cgroups v2 > [ 'linux/bpf.h', 'BPF_PROG_QUERY' ], > [ 'linux/bpf.h', 'BPF_CGROUP_DEVICE' ], > + > + # process management > + [ 'sys/syscall.h', 'SYS_pidfd_open' ], > ] > endif > > diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c > index 0a0dc5d2a4..f17fe022ec 100644 > --- a/src/qemu/qemu_nbdkit.c > +++ b/src/qemu/qemu_nbdkit.c > @@ -21,9 +21,11 @@ > > #include <config.h> > #include <glib.h> > +#include <sys/syscall.h> > > #include "vircommand.h" > #include "virerror.h" > +#include "virevent.h" > #include "virlog.h" > #include "virpidfile.h" > #include "virtime.h" > @@ -36,6 +38,7 @@ > #include "qemu_nbdkit.h" > #define LIBVIRT_QEMU_NBDKITPRIV_H_ALLOW > #include "qemu_nbdkitpriv.h" > +#include "qemu_process.h" > #include "qemu_security.h" > > #include <fcntl.h> > @@ -72,6 +75,13 @@ struct _qemuNbdkitCaps { > G_DEFINE_TYPE(qemuNbdkitCaps, qemu_nbdkit_caps, G_TYPE_OBJECT); > > > +struct _qemuNbdkitProcessPrivate { > + int monitor; > + virQEMUDriver *driver; Note that 'driver' can be fetched from vm's privateData's 'driver' field so you don't need to have both. > + virDomainObj *vm; > +}; > + > + > enum { > PIPE_FD_READ = 0, > PIPE_FD_WRITE = 1 > @@ -588,6 +598,168 @@ qemuNbdkitCapsCacheNew(const char *cachedir) > } > > > +static int > +qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc, > + virDomainObj *vm, > + virQEMUDriver *driver); As above. 'driver' is redundant. > + > + > +static void > +qemuNbdkitProcessHandleExit(qemuNbdkitProcess *proc) > +{ > + qemuNbdkitProcessPrivate *priv = proc->priv; > + bool was_running = false; > + > + VIR_DEBUG("nbdkit process %i died", proc->pid); > + > + /* clean up resources associated with process */ > + qemuNbdkitProcessStop(proc); > + > + if (!(priv->vm && priv->driver)) { > + VIR_WARN("Unable to restart nbdkit -- vm and driver not set"); The function that registers this handler ensures that the required stuff is set so this is dead code. And if not VIR_WARN is not the appropriate action. > + return; > + } > + > + VIR_DEBUG("restarting nbdkit process"); > + > + virObjectLock(priv->vm); > + if (virDomainObjBeginJob(priv->vm, VIR_JOB_SUSPEND) < 0) { > + VIR_WARN("can't begin job"); Same here, it's an error. > + goto cleanup; > + } > + > + /* Pause domain */ > + if (virDomainObjGetState(priv->vm, NULL) == VIR_DOMAIN_RUNNING) { > + was_running = true; > + if (qemuProcessStopCPUs(priv->driver, priv->vm, > + VIR_DOMAIN_PAUSED_IOERROR, > + VIR_ASYNC_JOB_NONE) < 0) > + goto endjob; > + VIR_DEBUG("Paused vm while we restart nbdkit backend"); I don't think we should pause the VM here, it's not like the start of NBDkit is slow. Additionally after testing this, it doesn't actually even prevent the VM from getting an I/O error after nbdkit is started back. Just a subsequent request re-establishes the connection. Please drop all of the VM state modification here. > + } > + > + if (qemuNbdkitProcessStart(proc, priv->vm, priv->driver) < 0) > + VIR_WARN("Unable to restart nbkdit process"); > + > + if (was_running && virDomainObjIsActive(priv->vm)) { > + if (qemuProcessStartCPUs(priv->driver, priv->vm, > + VIR_DOMAIN_RUNNING_UNPAUSED, > + VIR_ASYNC_JOB_NONE) < 0) { > + VIR_WARN("Unable to resume guest CPUs after nbdkit restart"); > + goto endjob; > + } > + VIR_DEBUG("Resumed vm"); > + } > + qemuNbdkitProcessStartMonitor(proc, NULL, NULL); > + > + endjob: > + virDomainObjEndJob(priv->vm); > + cleanup: > + virObjectUnlock(priv->vm); > +} > + > + > +#if WITH_DECL_SYS_PIDFD_OPEN > +static void > +qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED, > + int fd, > + int events G_GNUC_UNUSED, > + void *opaque) > +{ > + qemuNbdkitProcess *proc = opaque; > + > + VIR_FORCE_CLOSE(fd); > + qemuNbdkitProcessHandleExit(proc); > +} > +#else > +static void > +qemuNbdkitProcessTimeoutCb(int timer G_GNUC_UNUSED, > + void *opaque) > +{ > + qemuNbdkitProcess *proc = opaque; > + > + if (virProcessKill(proc->pid, 0) < 0) > + qemuNbdkitProcessHandleExit(proc); > +} > +#endif /* WITH_DECL_SYS_PIDFD_OPEN */ > + > + > +static int > +qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc, > + virDomainObj *vm, > + virQEMUDriver *driver) > +{ > + qemuNbdkitProcessPrivate *priv = proc->priv; > +#if WITH_DECL_SYS_PIDFD_OPEN > + int pidfd; > +#endif > + > + if (vm) { > + virObjectRef(vm); > + > + if (priv->vm) > + virObjectUnref(priv->vm); > + > + priv->vm = vm; > + } > + > + if (driver) > + priv->driver = driver; > + > + if (!(priv->vm && priv->driver)) { > + VIR_WARN("set vm and driver before calling %s", G_STRFUNC); Report proper error here. That also captures the function name. > + return -1; > + } > + > +#if WITH_DECL_SYS_PIDFD_OPEN > + pidfd = syscall(SYS_pidfd_open, proc->pid, 0); > + if (pidfd < 0) > + return -1; > + > + priv->monitor = virEventAddHandle(pidfd, > + VIR_EVENT_HANDLE_READABLE, > + qemuNbdkitProcessPidfdCb, > + proc, NULL); > +#else > + /* fall back to checking once a second */ > + priv->monitor = virEventAddTimeout(1000, > + qemuNbdkitProcessTimeoutCb, > + proc, NULL); > +#endif /* WITH_DECL_SYS_PIDFD_OPEN */ > + > + if (priv->monitor < 0) > + return -1; > + > + VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid); > + > + return 0; > +} > + > + > +static void > +qemuNbdkitProcessStopMonitor(qemuNbdkitProcess *proc) > +{ > + qemuNbdkitProcessPrivate *priv = proc->priv; > + > + if (priv->monitor > 0) { > +#if WITH_DECL_SYS_PIDFD_OPEN > + virEventRemoveHandle(priv->monitor); > +#else > + virEventRemoveTimeout(priv->monitor); > +#endif /* WITH_DECL_SYS_PIDFD_OPEN */ > + priv->monitor = 0; > + } > +} > + > + > +static void > +qemuNbdkitProcessPrivateFree(qemuNbdkitProcessPrivate *priv) > +{ > + virObjectUnref(priv->vm); > + g_free(priv); > +} > + > + > static qemuNbdkitProcess * > qemuNbdkitProcessNew(virStorageSource *source, > const char *pidfile, > @@ -601,6 +773,7 @@ qemuNbdkitProcessNew(virStorageSource *source, > nbdkit->pid = -1; > nbdkit->pidfile = g_strdup(pidfile); > nbdkit->socketfile = g_strdup(socketfile); > + nbdkit->priv = g_new0(qemuNbdkitProcessPrivate, 1); > > return nbdkit; > } > @@ -627,6 +800,45 @@ qemuNbdkitProcessLoad(virStorageSource *source, > } > > > +static int > +qemuNbdkitStorageSourceManageProcessOne(virStorageSource *src, > + virDomainObj *vm, > + virQEMUDriver *driver) > +{ > + qemuDomainStorageSourcePrivate *srcPriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(src); > + qemuNbdkitProcess *nbdkit; > + > + if (!srcPriv) > + return 0; > + > + nbdkit = srcPriv->nbdkitProcess; > + if (nbdkit) { > + nbdkit->caps = qemuGetNbdkitCaps(nbdkit->priv->driver); On standard VM startup this will leak the existing reference assigned via qemuDomainPrepareStorageSourceNbdkit. > + > + if (qemuNbdkitProcessStartMonitor(nbdkit, vm, driver) < 0) > + return -1; > + } > + > + return 0; > +}