Another small update after IRC chat. Add a missing LEAVE(), and change
some logging priorities.
diff --git a/exec/coroparse.c b/exec/coroparse.c
index 1ac0cdd..425e5d6 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -507,6 +507,7 @@ static int main_config_parser_cb(const char *path,
}
if ((strcmp(path, "quorum.two_node") == 0) ||
+ (strcmp(path, "quorum.expected_votes_tracking") == 0) ||
(strcmp(path, "quorum.allow_downscale") == 0) ||
(strcmp(path, "quorum.wait_for_all") == 0) ||
(strcmp(path, "quorum.auto_tie_breaker") == 0) ||
diff --git a/exec/votequorum.c b/exec/votequorum.c
index b81b253..1b6a4eb 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2012 Red Hat, Inc.
+ * Copyright (c) 2009-2014 Red Hat, Inc.
*
* All rights reserved.
*
@@ -36,7 +36,10 @@
#include <config.h>
#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
#include <stdint.h>
+#include <unistd.h>
#include <qb/qbipc_common.h>
@@ -85,6 +88,10 @@ static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
static uint8_t allow_downscale = 0;
static uint32_t ev_barrier = 0;
+static uint8_t ev_tracking = 0;
+static uint32_t ev_tracking_barrier = 0;
+static int ev_tracking_fd = -1;
+
/*
* votequorum_exec defines/structs/forward definitions
*/
@@ -589,6 +596,47 @@ static void decode_flags(uint32_t flags)
LEAVE();
}
+/*
+ * load/save are copied almost pristine from totemsrp,c
+ */
+static int load_ev_tracking_barrier(void)
+{
+ int res = 0;
+ char filename[PATH_MAX];
+
+ ENTER();
+
+ snprintf(filename, sizeof(filename) - 1, LOCALSTATEDIR "/lib/corosync/ev_tracking");
+
+ ev_tracking_fd = open(filename, O_RDWR, 0700);
+ if (ev_tracking_fd != -1) {
+ res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
+ if (res == sizeof (uint32_t)) {
+ LEAVE();
+ return 0;
+ }
+ }
+
+ ev_tracking_barrier = 0;
+ umask(0);
+ ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
+ if (ev_tracking_fd != -1) {
+ res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
+ if ((res == -1) || (res != sizeof (uint32_t))) {
+ log_printf(LOGSYS_LEVEL_WARNING,
+ "Unable to write to %s", filename);
+ }
+ LEAVE();
+ return 0;
+ }
+ log_printf(LOGSYS_LEVEL_WARNING,
+ "Unable to create %s file", filename);
+
+ LEAVE();
+
+ return -1;
+}
+
static void update_wait_for_all_status(uint8_t wfa_status)
{
ENTER();
@@ -644,6 +692,32 @@ static void update_qdevice_master_wins(uint8_t allow)
LEAVE();
}
+static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
+{
+ int res;
+
+ ENTER();
+
+ ev_tracking_barrier = ev_t_barrier;
+ icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
+
+ if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
+ log_printf(LOGSYS_LEVEL_WARNING,
+ "Unable to update ev_tracking_barrier on disk data!!!");
+ LEAVE();
+ return;
+ }
+
+ res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
+ if (res != sizeof (uint32_t)) {
+ log_printf(LOGSYS_LEVEL_WARNING,
+ "Unable to update ev_tracking_barrier on disk data!!!");
+ }
+ fdatasync(ev_tracking_fd);
+
+ LEAVE();
+}
+
/*
* quorum calculation core bits
*/
@@ -854,6 +928,11 @@ static void recalculate_quorum(int allow_decrease, int by_current_nodes)
votequorum_exec_send_expectedvotes_notification();
}
+ if ((ev_tracking) &&
+ (us->expected_votes > ev_tracking_barrier)) {
+ update_ev_tracking_barrier(us->expected_votes);
+ }
+
quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
are_we_quorate(total_votes);
@@ -1022,6 +1101,21 @@ static char *votequorum_readconfig(int runtime)
icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker);
icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
+ icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
+
+ /* allow_downscale requires ev_tracking */
+ if (allow_downscale) {
+ ev_tracking = 1;
+ }
+
+ if (ev_tracking) {
+ if (load_ev_tracking_barrier() < 0) {
+ LEAVE();
+ return ((char *)"Unable to load ev_tracking file!");
+ }
+ update_ev_tracking_barrier(ev_tracking_barrier);
+ }
+
}
/*
@@ -1144,6 +1238,11 @@ static char *votequorum_readconfig(int runtime)
/*
* set this node votes and expected_votes
*/
+ log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
+
+ if (ev_tracking) {
+ expected_votes = ev_tracking_barrier;
+ }
if (have_nodelist) {
us->votes = node_votes;
@@ -1666,7 +1765,7 @@ static void message_handler_req_exec_votequorum_nodeinfo (
us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
}
- if (node->flags & NODE_FLAGS_QUORATE) {
+ if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
} else {
node->expected_votes = us->expected_votes;
@@ -1683,7 +1782,6 @@ static void message_handler_req_exec_votequorum_nodeinfo (
}
recalculate:
-
if ((new_node) ||
(nodeid == us->node_id) ||
(node->flags & NODE_FLAGS_FIRST) ||
@@ -1739,6 +1837,9 @@ static void message_handler_req_exec_votequorum_reconfigure (
}
votequorum_exec_send_expectedvotes_notification();
update_ev_barrier(req_exec_quorum_reconfigure->value);
+ if (ev_tracking) {
+ us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
+ }
recalculate_quorum(1, 0); /* Allow decrease */
break;
@@ -1772,6 +1873,11 @@ static int votequorum_exec_exit_fn (void)
ret = votequorum_exec_send_nodeinfo(us->node_id);
}
+ if ((ev_tracking) && (ev_tracking_fd != -1)) {
+ close(ev_tracking_fd);
+ }
+
+
LEAVE();
return ret;
}
diff --git a/man/votequorum.5 b/man/votequorum.5
index 081431f..ebd2852 100644
--- a/man/votequorum.5
+++ b/man/votequorum.5
@@ -1,5 +1,5 @@
.\"/*
-.\" * Copyright (c) 2012 Red Hat, Inc.
+.\" * Copyright (c) 2012-2014 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
@@ -317,6 +317,27 @@ quorum {
allow_downscale: 1
}
.fi
+allow_downscale implicitly enabled EVT (see below).
+.PP
+.B expected_votes_tracking: 1
+.PP
+Enables Expected Votes Tracking (EVT) feature (default: 0).
+.PP
+Expected Votes Tracking stores the highest-seen value of expected votes on disk and uses
+that as the minimum value for expected votes in the absence of any higher authority (eg
+a current quorate cluster). This is useful for when a group of nodes becomes detached from
+the main cluster and after a restart could have enough votes to provide quorum, which can
+happen after using allow_downscale.
+.PP
+Note that even if the in-memory version of expected_votes is reduced, eg by removing nodes
+or using corosync-quorumtool, the stored value will still be the highest value seen - it
+never gets reduced.
+.PP
+The value is held in the file /var/lib/corosync/ev_tracking which can be deleted if you
+really do need to reduce the expected votes for any reason, like the node has been moved
+to a different cluster.
+.PP
+.fi
.PP
.SH VARIOUS NOTES
.PP
_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss