[PATCH v2 21/22] rpc: don't let systemd shutdown daemon while saving VMs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The service unit "TimeoutStopSec" setting controls how long systemd
waits for a service to stop before aggressively killing it, defaulting
to 30 seconds if not set.

When we're processing shutdown of VMs in response to OS shutdown, we
very likely need more than 30 seconds to complete this job, and can
not stop the daemon during this time.

To avoid being prematurely killed, setup a timer that repeatedly
extends the "TimeoutStopSec" value while stop of running VMs is
arranged.

This does mean if libvirt hangs while stoppping VMs, systemd won't
get to kill the libvirt daemon, but this is considered less harmful
that forcefully killing running VMs.

Signed-off-by: Daniel P. Berrangé <berrange@xxxxxxxxxx>
---
 src/rpc/virnetdaemon.c | 53 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/src/rpc/virnetdaemon.c b/src/rpc/virnetdaemon.c
index 53dee60703..944a832ea8 100644
--- a/src/rpc/virnetdaemon.c
+++ b/src/rpc/virnetdaemon.c
@@ -84,6 +84,7 @@ struct _virNetDaemon {
     virNetDaemonLifecycleCallback shutdownPrepareCb;
     virNetDaemonLifecycleCallback shutdownWaitCb;
     virThread *stateStopThread;
+    int stopTimer;
     int quitTimer;
     virNetDaemonQuitPhase quit;
     bool graceful;
@@ -99,6 +100,25 @@ struct _virNetDaemon {
 
 static virClass *virNetDaemonClass;
 
+/*
+ * When running state stop operation which can be slow...
+ *
+ * How frequently we tell systemd to extend our stop time,
+ * and how much we ask for each time. The latter should
+ * exceed the former with a decent tolerance for high load
+ * scenarios
+ */
+#define VIR_NET_DAEMON_STOP_EXTEND_INTERVAL_MSEC (5 * 1000)
+#define VIR_NET_DAEMON_STOP_EXTRA_TIME_SEC 10
+
+/*
+ * When running daemon shutdown synchronization which
+ * ought to be moderately fast
+ */
+#define VIR_NET_DAEMON_SHUTDOWN_TIMEOUT_SEC 30
+#define VIR_NET_DAEMON_SHUTDOWN_TIMEOUT_MSEC (VIR_NET_DAEMON_SHUTDOWN_TIMEOUT_SEC * 1000)
+
+
 static int
 daemonServerClose(void *payload,
                   const char *key G_GNUC_UNUSED,
@@ -168,6 +188,7 @@ virNetDaemonNew(void)
     if (virEventRegisterDefaultImpl() < 0)
         goto error;
 
+    dmn->stopTimer = -1;
     dmn->autoShutdownTimerID = -1;
 
 #ifndef WIN32
@@ -737,6 +758,23 @@ daemonShutdownWait(void *opaque)
     }
 }
 
+static void
+virNetDaemonStopTimer(int timerid G_GNUC_UNUSED,
+                      void *opaque)
+{
+    virNetDaemon *dmn = opaque;
+    VIR_LOCK_GUARD lock = virObjectLockGuard(dmn);
+
+    if (dmn->quit != VIR_NET_DAEMON_QUIT_STOPPING)
+        return;
+
+    VIR_DEBUG("Extending stop timeout %u",
+              VIR_NET_DAEMON_STOP_EXTRA_TIME_SEC);
+
+    virSystemdNotifyExtendTimeout(VIR_NET_DAEMON_STOP_EXTRA_TIME_SEC);
+}
+
+
 static void
 virNetDaemonQuitTimer(int timerid G_GNUC_UNUSED,
                       void *opaque)
@@ -791,11 +829,19 @@ virNetDaemonRun(virNetDaemon *dmn)
 
         if (dmn->quit == VIR_NET_DAEMON_QUIT_REQUESTED) {
             VIR_DEBUG("Process quit request");
+            virSystemdNotifyStopping();
             virHashForEach(dmn->servers, daemonServerClose, NULL);
 
             if (dmn->stateStopThread) {
                 VIR_DEBUG("State stop thread running");
                 dmn->quit = VIR_NET_DAEMON_QUIT_STOPPING;
+                virSystemdNotifyExtendTimeout(VIR_NET_DAEMON_STOP_EXTRA_TIME_SEC);
+                if ((dmn->stopTimer = virEventAddTimeout(VIR_NET_DAEMON_STOP_EXTEND_INTERVAL_MSEC,
+                                                         virNetDaemonStopTimer,
+                                                         dmn, NULL)) < 0) {
+                    VIR_WARN("Failed to register stop timer");
+                    /* hope for the best */
+                }
             } else {
                 VIR_DEBUG("Ready to shutdown");
                 dmn->quit = VIR_NET_DAEMON_QUIT_READY;
@@ -807,7 +853,8 @@ virNetDaemonRun(virNetDaemon *dmn)
             if (dmn->shutdownPrepareCb && dmn->shutdownPrepareCb() < 0)
                 break;
 
-            if ((dmn->quitTimer = virEventAddTimeout(30 * 1000,
+            virSystemdNotifyExtendTimeout(VIR_NET_DAEMON_SHUTDOWN_TIMEOUT_SEC);
+            if ((dmn->quitTimer = virEventAddTimeout(VIR_NET_DAEMON_SHUTDOWN_TIMEOUT_MSEC,
                                                      virNetDaemonQuitTimer,
                                                      dmn, NULL)) < 0) {
                 VIR_WARN("Failed to register finish timer.");
@@ -879,6 +926,10 @@ virNetDaemonStopWorker(void *opaque)
             dmn->quit = VIR_NET_DAEMON_QUIT_READY;
         }
         g_clear_pointer(&dmn->stateStopThread, g_free);
+        if (dmn->stopTimer != -1) {
+            virEventRemoveTimeout(dmn->stopTimer);
+            dmn->stopTimer = -1;
+        }
     }
 
     VIR_DEBUG("End stop dmn=%p", dmn);
-- 
2.48.1




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux