Adds the ability to monitor the nbdkit process so that we can take action in case the child exits unexpectedly. When the nbdkit process exits, we pause the vm, restart nbdkit, and then resume the vm. This allows the vm to continue working in the event of a nbdkit failure. Eventually we may want to generalize this functionality since we may need something similar for e.g. qemu-storage-daemon, etc. The process is monitored with the pidfd_open() syscall if it exists (since linux 5.3). Otherwise it resorts to checking whether the process is alive once a second. The one-second time period was chosen somewhat arbitrarily. Signed-off-by: Jonathon Jongsma <jjongsma@xxxxxxxxxx> --- meson.build | 7 ++ src/qemu/qemu_nbdkit.c | 166 +++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_nbdkit.h | 7 +- src/qemu/qemu_process.c | 4 +- 4 files changed, 177 insertions(+), 7 deletions(-) diff --git a/meson.build b/meson.build index e498b49be4..048b15ff71 100644 --- a/meson.build +++ b/meson.build @@ -645,6 +645,13 @@ symbols = [ [ 'sched.h', 'cpu_set_t' ], ] +if host_machine.system() == 'linux' + symbols += [ + # process management + [ 'sys/syscall.h', 'SYS_pidfd_open' ], + ] +endif + foreach symbol : symbols if cc.has_header_symbol(symbol[0], symbol[1], args: '-D_GNU_SOURCE', prefix: symbol.get(2, '')) conf.set('WITH_DECL_@0@'.format(symbol[1].to_upper()), 1) diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c index 5848710dc2..934970e68c 100644 --- a/src/qemu/qemu_nbdkit.c +++ b/src/qemu/qemu_nbdkit.c @@ -19,9 +19,11 @@ #include <config.h> #include <glib.h> +#include <sys/syscall.h> #include "vircommand.h" #include "virerror.h" +#include "virevent.h" #include "virlog.h" #include "virpidfile.h" #include "virtime.h" @@ -34,6 +36,7 @@ #include "qemu_nbdkit.h" #define LIBVIRT_QEMU_NBDKITPRIV_H_ALLOW #include "qemu_nbdkitpriv.h" +#include "qemu_process.h" #include "qemu_security.h" #include <fcntl.h> @@ -69,6 +72,12 @@ struct _qemuNbdkitCaps { G_DEFINE_TYPE(qemuNbdkitCaps, qemu_nbdkit_caps, G_TYPE_OBJECT); +struct _qemuNbdkitProcessPrivate { + int pidfdwatch; + virDomainObj *vm; +}; + + enum { PIPE_FD_READ = 0, PIPE_FD_WRITE = 1 @@ -618,6 +627,137 @@ qemuNbdkitCapsCacheNew(const char *cachedir) } +static int +qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc, + virDomainObj *vm); + + +static void +qemuNbdkitProcessHandleExit(qemuNbdkitProcess *proc) +{ + qemuNbdkitProcessPrivate *priv = proc->priv; + qemuDomainObjPrivate *vmpriv = priv->vm->privateData; + virQEMUDriver *driver = vmpriv->driver; + + VIR_DEBUG("nbdkit process %i died", proc->pid); + + /* clean up resources associated with process */ + qemuNbdkitProcessStop(proc); + + if (!priv->vm) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot restart nbdkit process without an associated domain")); + return; + } + + if (qemuNbdkitProcessStart(proc, priv->vm, driver) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unable to restart nbkdit process")); + return; + } + + qemuNbdkitProcessStartMonitor(proc, NULL); +} + + +#if WITH_DECL_SYS_PIDFD_OPEN +static void +qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED, + int fd, + int events G_GNUC_UNUSED, + void *opaque) +{ + qemuNbdkitProcess *proc = opaque; + + VIR_FORCE_CLOSE(fd); + qemuNbdkitProcessHandleExit(proc); +} +#else +static void +qemuNbdkitProcessTimeoutCb(int timer G_GNUC_UNUSED, + void *opaque) +{ + qemuNbdkitProcess *proc = opaque; + + if (virProcessKill(proc->pid, 0) < 0) + qemuNbdkitProcessHandleExit(proc); +} +#endif /* WITH_DECL_SYS_PIDFD_OPEN */ + + +static int +qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc, + virDomainObj *vm) +{ + qemuNbdkitProcessPrivate *priv = proc->priv; +#if WITH_DECL_SYS_PIDFD_OPEN + int pidfd; +#endif + + if (vm) { + virObjectRef(vm); + + if (priv->vm) + virObjectUnref(priv->vm); + + priv->vm = vm; + } + + if (!priv->vm) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot monitor nbdkit process without an associated domain")); + return -1; + } + +#if WITH_DECL_SYS_PIDFD_OPEN + pidfd = syscall(SYS_pidfd_open, proc->pid, 0); + if (pidfd < 0) + return -1; + + priv->pidfdwatch = virEventAddHandle(pidfd, + VIR_EVENT_HANDLE_READABLE, + qemuNbdkitProcessPidfdCb, + proc, NULL); +#else + /* fall back to checking once a second */ + priv->pidfdwatch = virEventAddTimeout(1000, + qemuNbdkitProcessTimeoutCb, + proc, NULL); +#endif /* WITH_DECL_SYS_PIDFD_OPEN */ + + if (priv->pidfdwatch < 0) + return -1; + + VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid); + + return 0; +} + + +static void +qemuNbdkitProcessStopMonitor(qemuNbdkitProcess *proc) +{ + qemuNbdkitProcessPrivate *priv = proc->priv; + + if (priv->pidfdwatch > 0) { +#if WITH_DECL_SYS_PIDFD_OPEN + virEventRemoveHandle(priv->pidfdwatch); +#else + virEventRemoveTimeout(priv->pidfdwatch); +#endif /* WITH_DECL_SYS_PIDFD_OPEN */ + priv->pidfdwatch = 0; + } +} + + +static void +qemuNbdkitProcessPrivateFree(qemuNbdkitProcessPrivate *priv) +{ + virObjectUnref(priv->vm); + g_free(priv); +} + + static qemuNbdkitProcess * qemuNbdkitProcessNew(virStorageSource *source, const char *pidfile, @@ -631,6 +771,7 @@ qemuNbdkitProcessNew(virStorageSource *source, nbdkit->pid = -1; nbdkit->pidfile = g_strdup(pidfile); nbdkit->socketfile = g_strdup(socketfile); + nbdkit->priv = g_new0(qemuNbdkitProcessPrivate, 1); return nbdkit; } @@ -665,9 +806,11 @@ qemuNbdkitReconnectStorageSource(virStorageSource *source, static int -qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) +qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source, + virDomainObj *vm) { qemuDomainStorageSourcePrivate *srcpriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(source); + qemuDomainObjPrivate *vmpriv = vm->privateData; qemuNbdkitProcess *proc; if (!srcpriv) @@ -676,6 +819,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) proc = srcpriv->nbdkitProcess; if (proc) { + if (!proc->caps) + proc->caps = qemuGetNbdkitCaps(vmpriv->driver); + if (proc->pid <= 0) { if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0) return -1; @@ -686,6 +832,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) _("nbdkit process %i is not alive"), proc->pid); return -1; } + + if (qemuNbdkitProcessStartMonitor(proc, vm) < 0) + return -1; } return 0; @@ -701,15 +850,16 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) * disk and is attempting to re-connect to active domains. */ int -qemuNbdkitStorageSourceManageProcess(virStorageSource *source) +qemuNbdkitStorageSourceManageProcess(virStorageSource *source, + virDomainObj *vm) { virStorageSource *backing; for (backing = source->backingStore; backing != NULL; backing = backing->backingStore) { - if (qemuNbdkitStorageSourceManageProcessOne(backing) < 0) + if (qemuNbdkitStorageSourceManageProcessOne(backing, vm) < 0) return -1; } - return qemuNbdkitStorageSourceManageProcessOne(source); + return qemuNbdkitStorageSourceManageProcessOne(source, vm); } @@ -1005,9 +1155,12 @@ qemuNbdkitProcessBuildCommand(qemuNbdkitProcess *proc) void qemuNbdkitProcessFree(qemuNbdkitProcess *proc) { + qemuNbdkitProcessStopMonitor(proc); + g_clear_pointer(&proc->pidfile, g_free); g_clear_pointer(&proc->socketfile, g_free); g_clear_object(&proc->caps); + g_clear_pointer(&proc->priv, qemuNbdkitProcessPrivateFree); g_free(proc); } @@ -1087,6 +1240,9 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc, goto error; } + if (qemuNbdkitProcessStartMonitor(proc, vm) < 0) + goto error; + return 0; error: @@ -1107,6 +1263,8 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc, int qemuNbdkitProcessStop(qemuNbdkitProcess *proc) { + qemuNbdkitProcessStopMonitor(proc); + if (proc->pid < 0) return 0; diff --git a/src/qemu/qemu_nbdkit.h b/src/qemu/qemu_nbdkit.h index 8191ace522..df45f409c0 100644 --- a/src/qemu/qemu_nbdkit.h +++ b/src/qemu/qemu_nbdkit.h @@ -69,7 +69,8 @@ void qemuNbdkitStopStorageSource(virStorageSource *src); int -qemuNbdkitStorageSourceManageProcess(virStorageSource *src); +qemuNbdkitStorageSourceManageProcess(virStorageSource *src, + virDomainObj *vm); bool qemuNbdkitCapsGet(qemuNbdkitCaps *nbdkitCaps, @@ -82,6 +83,8 @@ qemuNbdkitCapsSet(qemuNbdkitCaps *nbdkitCaps, #define QEMU_TYPE_NBDKIT_CAPS qemu_nbdkit_caps_get_type() G_DECLARE_FINAL_TYPE(qemuNbdkitCaps, qemu_nbdkit_caps, QEMU, NBDKIT_CAPS, GObject); +typedef struct _qemuNbdkitProcessPrivate qemuNbdkitProcessPrivate; + struct _qemuNbdkitProcess { qemuNbdkitCaps *caps; virStorageSource *source; @@ -91,6 +94,8 @@ struct _qemuNbdkitProcess { uid_t user; gid_t group; pid_t pid; + + qemuNbdkitProcessPrivate *priv; }; int diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 7ec31ef6ac..54fd44fb40 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -9049,12 +9049,12 @@ qemuProcessReconnect(void *opaque) for (i = 0; i < obj->def->ndisks; i++) { virDomainDiskDef *disk = obj->def->disks[i]; - if (qemuNbdkitStorageSourceManageProcess(disk->src) < 0) + if (qemuNbdkitStorageSourceManageProcess(disk->src, obj) < 0) goto error; } if (obj->def->os.loader && obj->def->os.loader->nvram) { - if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram) < 0) + if (qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram, obj) < 0) goto error; } -- 2.39.0