https://bugzilla.redhat.com/show_bug.cgi?id=1047659 If a VM dies very early during an attempted connect to the guest agent while the locks are down the domain monitor object will be freed. The object is then accessed later as any failure during guest agent startup isn't considered fatal. In the current upstream version this doesn't lead to a crash as virObjectLock called when entering the monitor in qemuProcessDetectVcpuPIDs checks the pointer before attempting to dereference (lock) it. The NULL pointer is then caught in the monitor helper code. Before the introduction of virObjectLockable - observed on 0.10.2 - the pointer is locked directly via virMutexLock leading to a crash. To avoid this problem we need to differentiate between the guest agent not being present and the VM quitting when the locks were down. The fix reorganizes the code in qemuConnectAgent to add the check and then adds special handling to the callers. --- src/qemu/qemu_process.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index a27eded..cf23ff3 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -248,6 +248,17 @@ qemuConnectAgent(virQEMUDriverPtr driver, virDomainObjPtr vm) virObjectLock(vm); priv->agentStart = 0; + if (agent == NULL) + virObjectUnref(vm); + + if (!virDomainObjIsActive(vm)) { + qemuAgentClose(agent); + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest crashed while connecting to the guest agent")); + ret = -2; + goto cleanup; + } + if (virSecurityManagerClearSocketLabel(driver->securityManager, vm->def) < 0) { VIR_ERROR(_("Failed to clear security context for agent for %s"), @@ -255,13 +266,7 @@ qemuConnectAgent(virQEMUDriverPtr driver, virDomainObjPtr vm) goto cleanup; } - if (agent == NULL) - virObjectUnref(vm); - if (!virDomainObjIsActive(vm)) { - qemuAgentClose(agent); - goto cleanup; - } priv->agent = agent; if (priv->agent == NULL) { @@ -3120,6 +3125,7 @@ qemuProcessReconnect(void *opaque) int reason; virQEMUDriverConfigPtr cfg; size_t i; + int ret; memcpy(&oldjob, &data->oldjob, sizeof(oldjob)); @@ -3144,7 +3150,10 @@ qemuProcessReconnect(void *opaque) goto error; /* Failure to connect to agent shouldn't be fatal */ - if (qemuConnectAgent(driver, obj) < 0) { + if ((ret = qemuConnectAgent(driver, obj)) < 0) { + if (ret == -2) + goto error; + VIR_WARN("Cannot connect to QEMU guest agent for %s", obj->def->name); virResetLastError(); @@ -4018,7 +4027,10 @@ int qemuProcessStart(virConnectPtr conn, goto cleanup; /* Failure to connect to agent shouldn't be fatal */ - if (qemuConnectAgent(driver, vm) < 0) { + if ((ret = qemuConnectAgent(driver, vm)) < 0) { + if (ret == -2) + goto cleanup; + VIR_WARN("Cannot connect to QEMU guest agent for %s", vm->def->name); virResetLastError(); @@ -4478,6 +4490,7 @@ int qemuProcessAttach(virConnectPtr conn ATTRIBUTE_UNUSED, virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); virCapsPtr caps = NULL; bool active = false; + int ret; VIR_DEBUG("Beginning VM attach process"); @@ -4592,7 +4605,10 @@ int qemuProcessAttach(virConnectPtr conn ATTRIBUTE_UNUSED, goto error; /* Failure to connect to agent shouldn't be fatal */ - if (qemuConnectAgent(driver, vm) < 0) { + if ((ret = qemuConnectAgent(driver, vm)) < 0) { + if (ret == -2) + goto error; + VIR_WARN("Cannot connect to QEMU guest agent for %s", vm->def->name); virResetLastError(); -- 1.8.5.2 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list