Detect and react on situations when libvirtd was restarted or killed when a job was active. --- src/qemu/qemu_domain.c | 14 ++++++++ src/qemu/qemu_domain.h | 2 + src/qemu/qemu_process.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 062ecc7..b26308e 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -142,6 +142,20 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) memset(&job->signalsData, 0, sizeof(job->signalsData)); } +void +qemuDomainObjRestoreJob(virDomainObjPtr obj, + struct qemuDomainJobObj *job) +{ + qemuDomainObjPrivatePtr priv = obj->privateData; + + memset(job, 0, sizeof(*job)); + job->active = priv->job.active; + job->asyncJob = priv->job.asyncJob; + + qemuDomainObjResetJob(priv); + qemuDomainObjResetAsyncJob(priv); +} + static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 17d1356..49be3d2 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -177,6 +177,8 @@ void qemuDomainObjEndNestedJob(struct qemud_driver *driver, void qemuDomainObjSaveJob(struct qemud_driver *driver, virDomainObjPtr obj); void qemuDomainObjSetAsyncJobMask(virDomainObjPtr obj, unsigned long long allowedJobs); +void qemuDomainObjRestoreJob(virDomainObjPtr obj, + struct qemuDomainJobObj *job); void qemuDomainObjDiscardAsyncJob(struct qemud_driver *driver, virDomainObjPtr obj); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 3ffde51..49625b5 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -2223,6 +2223,80 @@ qemuProcessUpdateState(struct qemud_driver *driver, virDomainObjPtr vm) return 0; } +static int +qemuProcessRecoverJob(struct qemud_driver *driver, + virDomainObjPtr vm, + virConnectPtr conn, + const struct qemuDomainJobObj *job) +{ + virDomainState state; + int reason; + + state = virDomainObjGetState(vm, &reason); + + switch (job->asyncJob) { + case QEMU_ASYNC_JOB_MIGRATION_OUT: + case QEMU_ASYNC_JOB_MIGRATION_IN: + /* we don't know what to do yet */ + break; + + case QEMU_ASYNC_JOB_SAVE: + case QEMU_ASYNC_JOB_DUMP: + /* TODO cancel possibly running migrate operation */ + /* resume the domain but only if it was paused as a result of + * running save/dump operation */ + if (state == VIR_DOMAIN_PAUSED && + ((job->asyncJob == QEMU_ASYNC_JOB_DUMP && + reason == VIR_DOMAIN_PAUSED_DUMP) || + (job->asyncJob == QEMU_ASYNC_JOB_SAVE && + reason == VIR_DOMAIN_PAUSED_SAVE) || + reason == VIR_DOMAIN_PAUSED_UNKNOWN)) { + if (qemuProcessStartCPUs(driver, vm, conn, + VIR_DOMAIN_RUNNING_UNPAUSED) < 0) { + VIR_WARN("Could not resume domain %s after", vm->def->name); + } + } + break; + + case QEMU_ASYNC_JOB_NONE: + case QEMU_ASYNC_JOB_LAST: + break; + } + + if (!virDomainObjIsActive(vm)) + return -1; + + switch (job->active) { + case QEMU_JOB_QUERY: + /* harmless */ + break; + + case QEMU_JOB_DESTROY: + VIR_DEBUG("Domain %s should have already been destroyed", + vm->def->name); + return -1; + + case QEMU_JOB_SUSPEND: + /* mostly harmless */ + break; + + case QEMU_JOB_MODIFY: + /* XXX depending on the command we may be in an inconsistent state and + * we should probably fall back to "monitor error" state and refuse to + */ + break; + + case QEMU_JOB_ASYNC: + case QEMU_JOB_ASYNC_NESTED: + /* async job was already handled above */ + case QEMU_JOB_NONE: + case QEMU_JOB_LAST: + break; + } + + return 0; +} + struct qemuProcessReconnectData { virConnectPtr conn; struct qemud_driver *driver; @@ -2239,9 +2313,12 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa struct qemud_driver *driver = data->driver; qemuDomainObjPrivatePtr priv; virConnectPtr conn = data->conn; + struct qemuDomainJobObj oldjob; virDomainObjLock(obj); + qemuDomainObjRestoreJob(obj, &oldjob); + VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name); priv = obj->privateData; @@ -2287,6 +2364,9 @@ qemuProcessReconnect(void *payload, const void *name ATTRIBUTE_UNUSED, void *opa if (qemuProcessFiltersInstantiate(conn, obj->def)) goto error; + if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0) + goto error; + priv->job.active = QEMU_JOB_NONE; /* update domain state XML with possibly updated state in virDomainObj */ -- 1.7.6 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list