[libvirt] [PATCH] Make virsh reconnect when loosing connection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



  This is a usability issue for virsh in case of disconnections,
for example if the remote libvirtd is restarted:
  https://bugzilla.redhat.com/show_bug.cgi?id=526656

the patch catch those and tries to automatically reconnect instead
of virsh exitting. The typical interaction with this patch is that
the command fails, but virsh automatically reconnects instead of
exitting, but it won't try to restart the failed command (since this
could have significant side effects). Example of such interraction:

-------------------------------------------------------

virsh # list --all
 Id Name                 State
----------------------------------
  - RHEL-5.4-64          shut off
  - WinXP                shut off

virsh # list --all
error: Failed to list active domains
error: cannot send data: Broken pipe
error: Reconnected to the hypervisor

virsh # list --all
 Id Name                 State
----------------------------------
  - RHEL-5.4-64          shut off
  - WinXP                shut off

virsh #

-------------------------------------------------------

 The only thing I'm unsure is if the signal handler should be reset
once it was received once. I don't in this patch and that seems to
work fine, but I somehow remember the fact that in some circumstances
a signal handler needs to be rearmed when received once. As is this
seems to work fine with SIGPIPE and linux.



Make virsh reconnect when loosing connection

Right now when the connected libvirtd restarts virsh gets a SIGPIPE
and dies, this change the behaviour to try to reconnect if the
signal was received or command error indicated a connection or RPC
failure. Note that the failing command is not restarted.

* tools/virsh.c: catch SIGPIPE signals as well as connection related
  failures, add some automatic reconnection code and appropriate error
  messages.

diff --git a/tools/virsh.c b/tools/virsh.c
index 65487ed..2ec9cfc 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -30,6 +30,7 @@
 #include <errno.h>
 #include <sys/stat.h>
 #include <inttypes.h>
+#include <signal.h>
 
 #include <libxml/parser.h>
 #include <libxml/tree.h>
@@ -397,6 +398,58 @@ out:
     last_error = NULL;
 }
 
+/*
+ * Detection of disconnections and automatic reconnection support
+ */
+static int disconnected = 0; /* we may have been disconnected */
+
+/*
+ * vshCatchDisconnect:
+ *
+ * We get here when a SIGPIPE is being raised, we can't do much in the
+ * handler, just save the fact it was raised
+ */
+static void vshCatchDisconnect(int sig, siginfo_t * siginfo,
+                               void* context ATTRIBUTE_UNUSED) {
+    if ((sig == SIGPIPE) || (siginfo->si_signo == SIGPIPE))
+        disconnected++;
+}
+
+/*
+ * vshSetupSignals:
+ *
+ * Catch SIGPIPE signals which may arise when desconnection from libvirtd occurs */
+static int
+vshSetupSignals(void) {
+    struct sigaction sig_action;
+
+    sig_action.sa_sigaction = vshCatchDisconnect;
+    sig_action.sa_flags = SA_SIGINFO;
+    sigemptyset(&sig_action.sa_mask);
+
+    sigaction(SIGPIPE, &sig_action, NULL);
+}
+
+/*
+ * vshReconnect:
+ *
+ * Reconnect after an
+ *
+ */
+static int
+vshReconnect(vshControl *ctl) {
+    if (ctl->conn != NULL)
+        virConnectClose(ctl->conn);
+
+    ctl->conn = virConnectOpenAuth(ctl->name,
+                                   virConnectAuthPtrDefault,
+                                   ctl->readonly ? VIR_CONNECT_RO : 0);
+    if (!ctl->conn)
+        vshError(ctl, "%s", _("Failed to reconnect to the hypervisor"));
+    else
+        vshError(ctl, "%s", _("Reconnected to the hypervisor"));
+    disconnected = 0;
+}
 
 /* ---------------
  * Commands
@@ -8332,6 +8385,9 @@ vshCommandRun(vshControl *ctl, const vshCmd *cmd)
     while (cmd) {
         struct timeval before, after;
 
+        if ((ctl->conn == NULL) || (disconnected != 0))
+            vshReconnect(ctl);
+
         if (ctl->timing)
             GETTIMEOFDAY(&before);
 
@@ -8343,6 +8399,17 @@ vshCommandRun(vshControl *ctl, const vshCmd *cmd)
         if (ret == FALSE)
             virshReportError(ctl);
 
+        /* try to catch automatically disconnections */
+        if ((ret == FALSE) && 
+            ((disconnected != 0) ||
+             ((last_error != NULL) &&
+              (((last_error->code == VIR_ERR_SYSTEM_ERROR) &&
+                (last_error->domain == 13)) ||
+               (last_error->code == VIR_ERR_RPC) ||
+               (last_error->code == VIR_ERR_NO_CONNECT) ||
+               (last_error->code == VIR_ERR_INVALID_CONN)))))
+            vshReconnect(ctl);
+
         if (STREQ(cmd->def->name, "quit"))        /* hack ... */
             return ret;
 
@@ -8673,9 +8740,11 @@ vshError(vshControl *ctl, const char *format, ...)
 {
     va_list ap;
 
-    va_start(ap, format);
-    vshOutputLogFile(ctl, VSH_ERR_ERROR, format, ap);
-    va_end(ap);
+    if (ctl != NULL) {
+        va_start(ap, format);
+        vshOutputLogFile(ctl, VSH_ERR_ERROR, format, ap);
+        va_end(ap);
+    }
 
     fputs(_("error: "), stderr);
 
@@ -8751,6 +8820,9 @@ vshInit(vshControl *ctl)
     /* set up the library error handler */
     virSetErrorFunc(NULL, virshErrorHandler);
 
+    /* set up the signals handlers to catch disconnections */
+    vshSetupSignals();
+
     ctl->conn = virConnectOpenAuth(ctl->name,
                                    virConnectAuthPtrDefault,
                                    ctl->readonly ? VIR_CONNECT_RO : 0);

-- 
Daniel Veillard      | libxml Gnome XML XSLT toolkit  http://xmlsoft.org/
daniel@xxxxxxxxxxxx  | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library  http://libvirt.org/

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list

[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]