[PATCH] qemuProcessStop: Don't unlock domain until most of the cleanup is done

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Imagine two threads. Thread A is executing qemuProcessStop() and
thread B is executing qemuDomainCreateXML(). To make things more
interesting, the domain XML passed to qemuDomainCreateXML matches
name + UUID of that the virDomainObj that qemuProcessStop() is
currently working with. Here's what happens.

1) Thread A locks @vm, enters qemuProcessStop().

2) Thread B parses given XML, calls virDomainObjListAdd() ->
   virDomainObjListAdd() -> virDomainObjListAddLocked() ->
   virDomainObjListFindByUUIDLocked(). Since there's a match on
   UUID, an virDomainObj object is returned and the thread
   proceeds to calling virObjectLock(). NB, it's the same object
   as in thread A.

3) Thread A sets vm->def->id = -1; this means that from this
   point on, virDomainObjIsActive() will return false.

4) Thread A calls qemuDomainObjStopWorker() which unlocks the
   @vm.

5) Thread B acquires the @vm lock and since
   virDomainObjIsActive() returns false, it proceeds to calling
   virDomainObjAssignDef() where vm->def is replaced.

6) Thread B then calls qemuProcessBeginJob() which unlocks the
   @vm temporarily.

7) Thread A, still in qemuDomainObjStopWorker() acquires @vm lock
   and proceeds with cleanup.

8) Thread A finds different definition than the one needing
   cleanup.

In my testing I've seen stale pointers, e.g.
vm->def->nets[0]->priv was NULL, which lead to a SIGSEGV as
there's  'QEMU_DOMAIN_NETWORK_PRIVATE(net)->created' line when
cleaning up nets. Your mileage may vary.

Even if we did not crash, the plain fact that vm->def is changed
in the middle of qemuProcessStop() means we might be cleaning up
something else than intended.

As a fix, I'm moving all lines that obviously touch vm->def
before the domain object is unlocked. That left
virHookCall(VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END) nearly
next to virHookCall(VIR_HOOK_QEMU_OP_RELEASE,
VIR_HOOK_SUBOP_END) which I figured is not something we want. So
I've shuffled things a bit more.

Fixes: 3865410e7f67ca4ec66e9a905e75f452762a97f0
Resolves: https://issues.redhat.com/browse/RHEL-49607
Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx>
---
 src/qemu/qemu_process.c | 122 ++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 60 deletions(-)

diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 25dfd04272..9ea6c678b8 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -8530,6 +8530,18 @@ void qemuProcessStop(virQEMUDriver *driver,
                                  VIR_QEMU_PROCESS_KILL_FORCE|
                                  VIR_QEMU_PROCESS_KILL_NOCHECK));
 
+    vm->pid = 0;
+
+    /* now that we know it's stopped call the hook if present */
+    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
+        g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
+
+        /* we can't stop the operation even if the script raised an error */
+        ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
+                                 VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
+                                 NULL, xml, NULL));
+    }
+
     if (priv->agent) {
         g_clear_pointer(&priv->agent, qemuAgentClose);
     }
@@ -8553,25 +8565,6 @@ void qemuProcessStop(virQEMUDriver *driver,
 
     qemuDBusStop(driver, vm);
 
-    /* Only after this point we can reset 'priv->beingDestroyed' so that
-     * there's no point at which the VM could be considered as alive between
-     * entering the destroy job and this point where the active "flag" is
-     * cleared.
-     */
-    vm->def->id = -1;
-    priv->beingDestroyed = false;
-
-    /* Wake up anything waiting on domain condition */
-    virDomainObjBroadcast(vm);
-
-    /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent
-     * deadlocks with the per-VM event loop thread. This MUST be done after
-     * marking the VM as dead */
-    qemuDomainObjStopWorker(vm);
-
-    if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback)
-        driver->inhibitCallback(false, driver->inhibitOpaque);
-
     /* Clear network bandwidth */
     virDomainClearNetBandwidth(vm->def);
 
@@ -8588,18 +8581,6 @@ void qemuProcessStop(virQEMUDriver *driver,
         }
     }
 
-    virPortAllocatorRelease(priv->nbdPort);
-    priv->nbdPort = 0;
-
-    if (priv->monConfig) {
-        if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
-            unlink(priv->monConfig->data.nix.path);
-        g_clear_pointer(&priv->monConfig, virObjectUnref);
-    }
-
-    /* Remove the master key */
-    qemuDomainMasterKeyRemove(priv);
-
     ignore_value(virDomainChrDefForeach(vm->def,
                                         false,
                                         qemuProcessCleanupChardevDevice,
@@ -8609,22 +8590,6 @@ void qemuProcessStop(virQEMUDriver *driver,
     /* Its namespace is also gone then. */
     qemuDomainDestroyNamespace(driver, vm);
 
-    virFileDeleteTree(priv->libDir);
-    virFileDeleteTree(priv->channelTargetDir);
-
-    /* Stop autodestroy in case guest is restarted */
-    virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy);
-
-    /* now that we know it's stopped call the hook if present */
-    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
-        g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0);
-
-        /* we can't stop the operation even if the script raised an error */
-        ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
-                                 VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
-                                 NULL, xml, NULL));
-    }
-
     /* Reset Security Labels unless caller don't want us to */
     if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
         qemuSecurityRestoreAllLabel(driver, vm,
@@ -8672,8 +8637,6 @@ void qemuProcessStop(virQEMUDriver *driver,
         virResctrlAllocRemove(vm->def->resctrls[i]->alloc);
     }
 
-    qemuProcessRemoveDomainStatus(driver, vm);
-
     /* Remove VNC and Spice ports from port reservation bitmap, but only if
        they were reserved by the driver (autoport=yes)
     */
@@ -8706,20 +8669,9 @@ void qemuProcessStop(virQEMUDriver *driver,
         }
     }
 
-    for (i = 0; i < vm->ndeprecations; i++)
-        g_free(vm->deprecations[i]);
-    g_clear_pointer(&vm->deprecations, g_free);
-    vm->ndeprecations = 0;
-    vm->taint = 0;
-    vm->pid = 0;
-    virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
     for (i = 0; i < vm->def->niothreadids; i++)
         vm->def->iothreadids[i]->thread_id = 0;
 
-    /* clean up a possible backup job */
-    if (priv->backup)
-        qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED);
-
     /* Do this explicitly after vm->pid is reset so that security drivers don't
      * try to enter the domain's namespace which is non-existent by now as qemu
      * is no longer running. */
@@ -8753,6 +8705,56 @@ void qemuProcessStop(virQEMUDriver *driver,
 
     qemuSecurityReleaseLabel(driver->securityManager, vm->def);
 
+    /* Only after this point we can reset 'priv->beingDestroyed' so that
+     * there's no point at which the VM could be considered as alive between
+     * entering the destroy job and this point where the active "flag" is
+     * cleared.
+     */
+    vm->def->id = -1;
+    priv->beingDestroyed = false;
+
+    /* Wake up anything waiting on domain condition */
+    virDomainObjBroadcast(vm);
+
+    /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent
+     * deadlocks with the per-VM event loop thread. This MUST be done after
+     * marking the VM as dead */
+    qemuDomainObjStopWorker(vm);
+
+    if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback)
+        driver->inhibitCallback(false, driver->inhibitOpaque);
+
+    virPortAllocatorRelease(priv->nbdPort);
+    priv->nbdPort = 0;
+
+    if (priv->monConfig) {
+        if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
+            unlink(priv->monConfig->data.nix.path);
+        g_clear_pointer(&priv->monConfig, virObjectUnref);
+    }
+
+    /* Remove the master key */
+    qemuDomainMasterKeyRemove(priv);
+
+    virFileDeleteTree(priv->libDir);
+    virFileDeleteTree(priv->channelTargetDir);
+
+    /* Stop autodestroy in case guest is restarted */
+    virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy);
+
+    qemuProcessRemoveDomainStatus(driver, vm);
+
+    for (i = 0; i < vm->ndeprecations; i++)
+        g_free(vm->deprecations[i]);
+    g_clear_pointer(&vm->deprecations, g_free);
+    vm->ndeprecations = 0;
+    vm->taint = 0;
+    virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
+
+    /* clean up a possible backup job */
+    if (priv->backup)
+        qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED);
+
     /* clear all private data entries which are no longer needed */
     qemuDomainObjPrivateDataClear(priv);
 
-- 
2.44.2




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux