Re: [PATCH] votequorum: Add persistent expected_votes tracking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Another small update after IRC chat. Add a missing LEAVE(), and change some logging priorities.



diff --git a/exec/coroparse.c b/exec/coroparse.c
index 1ac0cdd..425e5d6 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -507,6 +507,7 @@ static int main_config_parser_cb(const char *path,
 			}
 
 			if ((strcmp(path, "quorum.two_node") == 0) ||
+			    (strcmp(path, "quorum.expected_votes_tracking") == 0) ||
 			    (strcmp(path, "quorum.allow_downscale") == 0) ||
 			    (strcmp(path, "quorum.wait_for_all") == 0) ||
 			    (strcmp(path, "quorum.auto_tie_breaker") == 0) ||
diff --git a/exec/votequorum.c b/exec/votequorum.c
index b81b253..1b6a4eb 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2012 Red Hat, Inc.
+ * Copyright (c) 2009-2014 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -36,7 +36,10 @@
 #include <config.h>
 
 #include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include <stdint.h>
+#include <unistd.h>
 
 #include <qb/qbipc_common.h>
 
@@ -85,6 +88,10 @@ static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
 static uint8_t allow_downscale = 0;
 static uint32_t ev_barrier = 0;
 
+static uint8_t ev_tracking = 0;
+static uint32_t ev_tracking_barrier = 0;
+static int ev_tracking_fd = -1;
+
 /*
  * votequorum_exec defines/structs/forward definitions
  */
@@ -589,6 +596,47 @@ static void decode_flags(uint32_t flags)
 	LEAVE();
 }
 
+/*
+ * load/save are copied almost pristine from totemsrp,c
+ */
+static int load_ev_tracking_barrier(void)
+{
+	int res = 0;
+	char filename[PATH_MAX];
+
+	ENTER();
+
+	snprintf(filename, sizeof(filename) - 1, LOCALSTATEDIR "/lib/corosync/ev_tracking");
+
+	ev_tracking_fd = open(filename, O_RDWR, 0700);
+	if (ev_tracking_fd != -1) {
+		res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
+		if (res == sizeof (uint32_t)) {
+		        LEAVE();
+			return 0;
+		}
+	}
+
+	ev_tracking_barrier = 0;
+	umask(0);
+	ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
+	if (ev_tracking_fd != -1) {
+		res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
+		if ((res == -1) || (res != sizeof (uint32_t))) {
+			log_printf(LOGSYS_LEVEL_WARNING,
+				   "Unable to write to %s", filename);
+		}
+		LEAVE();
+		return 0;
+	}
+	log_printf(LOGSYS_LEVEL_WARNING,
+		   "Unable to create %s file", filename);
+
+	LEAVE();
+
+	return -1;
+}
+
 static void update_wait_for_all_status(uint8_t wfa_status)
 {
 	ENTER(); 
@@ -644,6 +692,32 @@ static void update_qdevice_master_wins(uint8_t allow)
 	LEAVE();
 }
 
+static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
+{
+	int res;
+
+	ENTER();
+
+	ev_tracking_barrier = ev_t_barrier;
+	icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
+
+	if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
+		log_printf(LOGSYS_LEVEL_WARNING,
+			   "Unable to update ev_tracking_barrier on disk data!!!");
+		LEAVE();
+		return;
+	}
+
+	res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
+	if (res != sizeof (uint32_t)) {
+		log_printf(LOGSYS_LEVEL_WARNING,
+			   "Unable to update ev_tracking_barrier on disk data!!!");
+	}
+	fdatasync(ev_tracking_fd);
+
+	LEAVE();
+}
+
 /*
  * quorum calculation core bits
  */
@@ -854,6 +928,11 @@ static void recalculate_quorum(int allow_decrease, int by_current_nodes)
 		votequorum_exec_send_expectedvotes_notification();
 	}
 
+	if ((ev_tracking) &&
+	    (us->expected_votes > ev_tracking_barrier)) {
+		update_ev_tracking_barrier(us->expected_votes);
+	}
+
 	quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
 	are_we_quorate(total_votes);
 
@@ -1022,6 +1101,21 @@ static char *votequorum_readconfig(int runtime)
 		icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker);
 		icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
 		icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
+		icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
+
+		/* allow_downscale requires ev_tracking */
+		if (allow_downscale) {
+		    ev_tracking = 1;
+		}
+
+		if (ev_tracking) {
+		    if (load_ev_tracking_barrier() < 0) {
+		        LEAVE();
+		        return ((char *)"Unable to load ev_tracking file!");
+		    }
+		    update_ev_tracking_barrier(ev_tracking_barrier);
+		}
+
 	}
 
 	/*
@@ -1144,6 +1238,11 @@ static char *votequorum_readconfig(int runtime)
 	/*
 	 * set this node votes and expected_votes
 	 */
+	log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
+
+	if (ev_tracking) {
+	        expected_votes = ev_tracking_barrier;
+	}
 
 	if (have_nodelist) {
 		us->votes = node_votes;
@@ -1666,7 +1765,7 @@ static void message_handler_req_exec_votequorum_nodeinfo (
 		us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
 	}
 
-	if (node->flags & NODE_FLAGS_QUORATE) {
+	if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
 		node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
 	} else {
 		node->expected_votes = us->expected_votes;
@@ -1683,7 +1782,6 @@ static void message_handler_req_exec_votequorum_nodeinfo (
 	}
 
 recalculate:
-
 	if ((new_node) ||
 	    (nodeid == us->node_id) ||
 	    (node->flags & NODE_FLAGS_FIRST) || 
@@ -1739,6 +1837,9 @@ static void message_handler_req_exec_votequorum_reconfigure (
 		}
 		votequorum_exec_send_expectedvotes_notification();
 		update_ev_barrier(req_exec_quorum_reconfigure->value);
+		if (ev_tracking) {
+		    us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
+		}
 		recalculate_quorum(1, 0);  /* Allow decrease */
 		break;
 
@@ -1772,6 +1873,11 @@ static int votequorum_exec_exit_fn (void)
 		ret = votequorum_exec_send_nodeinfo(us->node_id);
 	}
 
+	if ((ev_tracking) && (ev_tracking_fd != -1)) {
+	    close(ev_tracking_fd);
+	}
+
+
 	LEAVE();
 	return ret;
 }
diff --git a/man/votequorum.5 b/man/votequorum.5
index 081431f..ebd2852 100644
--- a/man/votequorum.5
+++ b/man/votequorum.5
@@ -1,5 +1,5 @@
 .\"/*
-.\" * Copyright (c) 2012 Red Hat, Inc.
+.\" * Copyright (c) 2012-2014 Red Hat, Inc.
 .\" *
 .\" * All rights reserved.
 .\" *
@@ -317,6 +317,27 @@ quorum {
     allow_downscale: 1
 }
 .fi
+allow_downscale implicitly enabled EVT (see below).
+.PP
+.B expected_votes_tracking: 1
+.PP
+Enables Expected Votes Tracking (EVT) feature (default: 0).
+.PP
+Expected Votes Tracking stores the highest-seen value of expected votes on disk and uses
+that as the minimum value for expected votes in the absence of any higher authority (eg 
+a current quorate cluster). This is useful for when a group of nodes becomes detached from
+the main cluster and after a restart could have enough votes to provide quorum, which can 
+happen after using allow_downscale. 
+.PP
+Note that even if the in-memory version of expected_votes is reduced, eg by removing nodes
+or using corosync-quorumtool, the stored value will still be the highest value seen - it
+never gets reduced.
+.PP
+The value is held in the file /var/lib/corosync/ev_tracking which can be deleted if you 
+really do need to reduce the expected votes for any reason, like the node has been moved 
+to a different cluster.
+.PP
+.fi
 .PP
 .SH VARIOUS NOTES
 .PP
_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss

[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux