Adds the ability to monitor the nbdkit process so that we can take action in case the child exits unexpectedly. When the nbdkit process exits, we pause the vm, restart nbdkit, and then resume the vm. This allows the vm to continue working in the event of a nbdkit failure. Eventually we may want to generalize this functionality since we may need something similar for e.g. qemu-storage-daemon, etc. The process is monitored with the pidfd_open() syscall if it exists (since linux 5.3). Otherwise it resorts to checking whether the process is alive once a second. The one-second time period was chosen somewhat arbitrarily. Signed-off-by: Jonathon Jongsma <jjongsma@xxxxxxxxxx> --- meson.build | 11 +++ src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 16 ++++ src/qemu/qemu_nbdkit.c | 169 ++++++++++++++++++++++++++++++++++++--- src/qemu/qemu_nbdkit.h | 8 +- src/qemu/qemu_process.c | 15 +++- src/qemu/qemu_process.h | 3 + tests/meson.build | 6 +- tests/qemuxml2argvtest.c | 6 +- 10 files changed, 219 insertions(+), 17 deletions(-) diff --git a/meson.build b/meson.build index 965ada483b..18ec312ee6 100644 --- a/meson.build +++ b/meson.build @@ -682,6 +682,13 @@ symbols = [ [ 'sched.h', 'cpu_set_t' ], ] +if host_machine.system() == 'linux' + symbols += [ + # process management + [ 'sys/syscall.h', 'SYS_pidfd_open' ], + ] +endif + foreach symbol : symbols if cc.has_header_symbol(symbol[0], symbol[1], args: '-D_GNU_SOURCE', prefix: symbol.get(2, '')) conf.set('WITH_DECL_@0@'.format(symbol[1].to_upper()), 1) @@ -2002,6 +2009,9 @@ endif conf.set_quoted('TLS_PRIORITY', get_option('tls_priority')) +if conf.has('WITH_DECL_SYS_PIDFD_OPEN') + conf.set('WITH_NBDKIT', 1) +endif # Various definitions @@ -2259,6 +2269,7 @@ misc_summary = { 'firewalld-zone': conf.has('WITH_FIREWALLD_ZONE'), 'nss': conf.has('WITH_NSS'), 'numad': conf.has('WITH_NUMAD'), + 'nbdkit': conf.has('WITH_NBDKIT'), 'Init script': init_script, 'Char device locks': chrdev_lock_files, 'Loader/NVRAM': loader_res, diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 46fe5a1cf4..2b8ece8f19 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -11511,6 +11511,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event) case QEMU_PROCESS_EVENT_PR_DISCONNECT: case QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION: case QEMU_PROCESS_EVENT_RESET: + case QEMU_PROCESS_EVENT_NBDKIT_EXITED: case QEMU_PROCESS_EVENT_LAST: break; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index ddd20e67b4..f018b45eb6 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -465,6 +465,7 @@ typedef enum { QEMU_PROCESS_EVENT_MEMORY_DEVICE_SIZE_CHANGE, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, QEMU_PROCESS_EVENT_RESET, + QEMU_PROCESS_EVENT_NBDKIT_EXITED, QEMU_PROCESS_EVENT_LAST } qemuProcessEventType; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ad8428948b..c4236b872f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4040,6 +4040,19 @@ processResetEvent(virQEMUDriver *driver, } +static void +processNbdkitExitedEvent(virDomainObj *vm, + qemuNbdkitProcess *nbdkit) +{ + if (virDomainObjBeginJob(vm, VIR_JOB_MODIFY) < 0) + return; + + qemuNbdkitProcessRestart(nbdkit, vm); + + virDomainObjEndJob(vm); +} + + static void qemuProcessEventHandler(void *data, void *opaque) { struct qemuProcessEvent *processEvent = data; @@ -4097,6 +4110,9 @@ static void qemuProcessEventHandler(void *data, void *opaque) case QEMU_PROCESS_EVENT_RESET: processResetEvent(driver, vm); break; + case QEMU_PROCESS_EVENT_NBDKIT_EXITED: + processNbdkitExitedEvent(vm, processEvent->data); + break; case QEMU_PROCESS_EVENT_LAST: break; } diff --git a/src/qemu/qemu_nbdkit.c b/src/qemu/qemu_nbdkit.c index df638e99c0..b9ced1e5cf 100644 --- a/src/qemu/qemu_nbdkit.c +++ b/src/qemu/qemu_nbdkit.c @@ -19,6 +19,7 @@ #include <config.h> #include <glib.h> +#include <sys/syscall.h> #include "vircommand.h" #include "virerror.h" @@ -33,6 +34,7 @@ #include "qemu_nbdkit.h" #define LIBVIRT_QEMU_NBDKITPRIV_H_ALLOW #include "qemu_nbdkitpriv.h" +#include "qemu_process.h" #include "qemu_security.h" #include <fcntl.h> @@ -41,6 +43,12 @@ VIR_LOG_INIT("qemu.nbdkit"); +#if WITH_NBDKIT +# define WITHOUT_NBDKIT_UNUSED +#else +# define WITHOUT_NBDKIT_UNUSED G_GNUC_UNUSED +#endif + VIR_ENUM_IMPL(qemuNbdkitCaps, QEMU_NBDKIT_CAPS_LAST, /* 0 */ @@ -611,6 +619,116 @@ qemuNbdkitCapsCacheNew(const char *cachedir) } +void +qemuNbdkitProcessRestart(qemuNbdkitProcess *proc, + virDomainObj *vm) +{ + qemuDomainObjPrivate *vmpriv = vm->privateData; + virQEMUDriver *driver = vmpriv->driver; + + /* clean up resources associated with process */ + qemuNbdkitProcessStop(proc); + + if (qemuNbdkitProcessStart(proc, vm, driver) < 0) { + VIR_DEBUG("Unable to restart nbkdit process"); + virDomainObjTaint(vm, VIR_DOMAIN_TAINT_NBDKIT_RESTART); + } +} + + +#if WITH_NBDKIT +typedef struct { + qemuNbdkitProcess *proc; + virDomainObj *vm; +} qemuNbdkitProcessEventData; + + +static qemuNbdkitProcessEventData* +qemuNbdkitProcessEventDataNew(qemuNbdkitProcess *proc, + virDomainObj *vm) +{ + qemuNbdkitProcessEventData *d = g_new(qemuNbdkitProcessEventData, 1); + d->proc = proc; + d->vm = virObjectRef(vm); + return d; +} + + +static void +qemuNbdkitProcessEventDataFree(qemuNbdkitProcessEventData *d) +{ + virObjectUnref(d->vm); + g_free(d); +} + + +static void +qemuNbdkitProcessPidfdCb(int watch G_GNUC_UNUSED, + int fd, + int events G_GNUC_UNUSED, + void *opaque) +{ + qemuNbdkitProcessEventData *d = opaque; + + VIR_FORCE_CLOSE(fd); + /* submit an event so that it is handled in the per-vm event thread */ + qemuProcessHandleNbdkitExit(d->proc, d->vm); +} +#endif /* WITH_NBDKIT */ + + +static int +qemuNbdkitProcessStartMonitor(qemuNbdkitProcess *proc WITHOUT_NBDKIT_UNUSED, + virDomainObj *vm WITHOUT_NBDKIT_UNUSED) +{ +#if WITH_NBDKIT + int pidfd; + qemuNbdkitProcessEventData *data; + + pidfd = syscall(SYS_pidfd_open, proc->pid, 0); + if (pidfd < 0) { + virReportSystemError(errno, _("pidfd_open failed for %1$i"), proc->pid); + return -1; + } + + data = qemuNbdkitProcessEventDataNew(proc, vm); + if ((proc->eventwatch = virEventAddHandle(pidfd, + VIR_EVENT_HANDLE_READABLE, + qemuNbdkitProcessPidfdCb, + data, + (virFreeCallback)qemuNbdkitProcessEventDataFree)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to monitor nbdkit process %1$i"), + proc->pid); + VIR_FORCE_CLOSE(pidfd); + qemuNbdkitProcessEventDataFree(data); + return -1; + } + + VIR_DEBUG("Monitoring nbdkit process %i for exit", proc->pid); + + return 0; +#else + /* This should not be reachable */ + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("nbdkit support is not enabled")); + return -1; +#endif /* WITH_NBDKIT */ +} + + +static void +qemuNbdkitProcessStopMonitor(qemuNbdkitProcess *proc WITHOUT_NBDKIT_UNUSED) +{ +#if WITH_NBDKIT + if (proc->eventwatch > 0) { + virEventRemoveHandle(proc->eventwatch); + proc->eventwatch = 0; + } +#endif /* WITH_NBDKIT */ +} + + static qemuNbdkitProcess * qemuNbdkitProcessNew(virStorageSource *source, const char *pidfile, @@ -658,9 +776,11 @@ qemuNbdkitReconnectStorageSource(virStorageSource *source, static void -qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) +qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source, + virDomainObj *vm) { qemuDomainStorageSourcePrivate *srcpriv = QEMU_DOMAIN_STORAGE_SOURCE_PRIVATE(source); + qemuDomainObjPrivate *vmpriv = vm->privateData; qemuNbdkitProcess *proc; if (!srcpriv) @@ -671,6 +791,9 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) if (!proc) return; + if (!proc->caps) + proc->caps = qemuGetNbdkitCaps(vmpriv->driver); + if (proc->pid <= 0) { if (virPidFileReadPath(proc->pidfile, &proc->pid) < 0) { VIR_WARN("Unable to read pidfile '%s'", proc->pidfile); @@ -678,8 +801,16 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) } } - if (virProcessKill(proc->pid, 0) < 0) - VIR_WARN("nbdkit process %i is not alive", proc->pid); + if (virProcessKill(proc->pid, 0) < 0) { + VIR_DEBUG("nbdkit process %i is not alive", proc->pid); + qemuNbdkitProcessRestart(proc, vm); + return; + } + + if (qemuNbdkitProcessStartMonitor(proc, vm) < 0) { + VIR_DEBUG("unable monitor nbdkit process"); + virDomainObjTaint(vm, VIR_DOMAIN_TAINT_NBDKIT_RESTART); + } } /** @@ -692,22 +823,28 @@ qemuNbdkitStorageSourceManageProcessOne(virStorageSource *source) * disk and is attempting to re-connect to active domains. */ void -qemuNbdkitStorageSourceManageProcess(virStorageSource *source) +qemuNbdkitStorageSourceManageProcess(virStorageSource *source, + virDomainObj *vm) { virStorageSource *backing; for (backing = source; backing != NULL; backing = backing->backingStore) - qemuNbdkitStorageSourceManageProcessOne(backing); + qemuNbdkitStorageSourceManageProcessOne(backing, vm); } bool -qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps, - virStorageSource *source, - char *statedir, - const char *alias, - uid_t user, - gid_t group) +qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps WITHOUT_NBDKIT_UNUSED, + virStorageSource *source WITHOUT_NBDKIT_UNUSED, + char *statedir WITHOUT_NBDKIT_UNUSED, + const char *alias WITHOUT_NBDKIT_UNUSED, + uid_t user WITHOUT_NBDKIT_UNUSED, + gid_t group WITHOUT_NBDKIT_UNUSED) { +#if !WITH_NBDKIT + /* if nbdkit support is not enabled, don't construct the object so the + * calling function will fall back to qemu storage drivers */ + return false; +#else qemuDomainStorageSourcePrivate *srcPriv = qemuDomainStorageSourcePrivateFetch(source); g_autofree char *pidname = g_strdup_printf("nbdkit-%s.pid", alias); g_autofree char *socketname = g_strdup_printf("nbdkit-%s.socket", alias); @@ -751,6 +888,7 @@ qemuNbdkitInitStorageSource(qemuNbdkitCaps *caps, srcPriv->nbdkitProcess = proc; return true; +#endif /* WITH_NBDKIT */ } @@ -968,6 +1106,8 @@ qemuNbdkitProcessBuildCommand(qemuNbdkitProcess *proc) void qemuNbdkitProcessFree(qemuNbdkitProcess *proc) { + qemuNbdkitProcessStopMonitor(proc); + g_clear_pointer(&proc->pidfile, g_free); g_clear_pointer(&proc->socketfile, g_free); g_clear_object(&proc->caps); @@ -1037,8 +1177,11 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc, goto error; while (virTimeBackOffWait(&timebackoff)) { - if (virFileExists(proc->socketfile)) + if (virFileExists(proc->socketfile)) { + if (qemuNbdkitProcessStartMonitor(proc, vm) < 0) + goto error; return 0; + } if (virProcessKill(proc->pid, 0) == 0) continue; @@ -1069,6 +1212,8 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc, int qemuNbdkitProcessStop(qemuNbdkitProcess *proc) { + qemuNbdkitProcessStopMonitor(proc); + if (proc->pid < 0) return 0; diff --git a/src/qemu/qemu_nbdkit.h b/src/qemu/qemu_nbdkit.h index 36a2219d82..f33b049d38 100644 --- a/src/qemu/qemu_nbdkit.h +++ b/src/qemu/qemu_nbdkit.h @@ -69,7 +69,8 @@ void qemuNbdkitStopStorageSource(virStorageSource *src); void -qemuNbdkitStorageSourceManageProcess(virStorageSource *src); +qemuNbdkitStorageSourceManageProcess(virStorageSource *src, + virDomainObj *vm); bool qemuNbdkitCapsGet(qemuNbdkitCaps *nbdkitCaps, @@ -91,6 +92,7 @@ struct _qemuNbdkitProcess { uid_t user; gid_t group; pid_t pid; + int eventwatch; }; int @@ -98,6 +100,10 @@ qemuNbdkitProcessStart(qemuNbdkitProcess *proc, virDomainObj *vm, virQEMUDriver *driver); +void +qemuNbdkitProcessRestart(qemuNbdkitProcess *proc, + virDomainObj *vm); + int qemuNbdkitProcessStop(qemuNbdkitProcess *proc); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index d90990d8a5..a1c6cfd91e 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -8993,10 +8993,10 @@ qemuProcessReconnect(void *opaque) } for (i = 0; i < obj->def->ndisks; i++) - qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src); + qemuNbdkitStorageSourceManageProcess(obj->def->disks[i]->src, obj); if (obj->def->os.loader && obj->def->os.loader->nvram) - qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram); + qemuNbdkitStorageSourceManageProcess(obj->def->os.loader->nvram, obj); /* update domain state XML with possibly updated state in virDomainObj */ if (virDomainObjSave(obj, driver->xmlopt, cfg->stateDir) < 0) @@ -9451,3 +9451,14 @@ qemuProcessQMPStart(qemuProcessQMP *proc) return 0; } + + +void +qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, + virDomainObj *vm) +{ + virObjectLock(vm); + VIR_DEBUG("nbdkit process %i died", nbdkit->pid); + qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit); + virObjectUnlock(vm); +} diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index cae1b49756..86a60d29c9 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -237,3 +237,6 @@ void qemuProcessRefreshDiskProps(virDomainDiskDef *disk, struct qemuDomainDiskInfo *info); int qemuProcessSetupEmulator(virDomainObj *vm); + +void qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit, + virDomainObj *vm); diff --git a/tests/meson.build b/tests/meson.build index f05774263c..b235c5f4dd 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -456,8 +456,12 @@ if conf.has('WITH_QEMU') { 'name': 'qemuvhostusertest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_file_wrapper_lib ] }, { 'name': 'qemuxml2argvtest', 'link_with': [ test_qemu_driver_lib, test_utils_qemu_monitor_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] }, { 'name': 'qemuxml2xmltest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] }, - { 'name': 'qemunbdkittest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] }, ] + if conf.has('WITH_NBDKIT') + tests += [ + { 'name': 'qemunbdkittest', 'link_with': [ test_qemu_driver_lib ], 'link_whole': [ test_utils_qemu_lib, test_file_wrapper_lib ] }, + ] + endif endif if conf.has('WITH_REMOTE') diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 0304f66f1d..216fd3a841 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -839,8 +839,12 @@ mymain(void) # define DO_TEST_CAPS_ARCH_VER(name, arch, ver) \ DO_TEST_CAPS_ARCH_VER_FULL(name, arch, ver, ARG_END) -# define DO_TEST_CAPS_LATEST_NBDKIT(name, ...) \ +# if WITH_NBDKIT +# define DO_TEST_CAPS_LATEST_NBDKIT(name, ...) \ DO_TEST_CAPS_ARCH_LATEST_FULL(name, "x86_64", ARG_NBDKIT_CAPS, __VA_ARGS__, QEMU_NBDKIT_CAPS_LAST, ARG_END) +# else +# define DO_TEST_CAPS_LATEST_NBDKIT(name, ...) +# endif /* WITH_NBDKIT */ # define DO_TEST_CAPS_LATEST(name) \ DO_TEST_CAPS_ARCH_LATEST(name, "x86_64") -- 2.41.0