[PATCH] votequorum: add leave_remove option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Fabio M. Di Nitto" <fdinitto@xxxxxxxxxx>

this also cleanup NODESTATE for good. JOINING was never used

Signed-off-by: Fabio M. Di Nitto <fdinitto@xxxxxxxxxx>
---
 exec/coroparse.c                  |    1 +
 exec/votequorum.c                 |   63 ++++++++++++++++++++++++++++++------
 include/corosync/ipc_votequorum.h |    1 +
 include/corosync/votequorum.h     |    7 ++--
 man/votequorum.5                  |   48 +++++++++++++++++++++++++++-
 man/votequorum_getinfo.3          |    1 +
 test/testvotequorum1.c            |    5 +--
 tools/corosync-quorumtool.c       |    1 +
 8 files changed, 109 insertions(+), 18 deletions(-)

diff --git a/exec/coroparse.c b/exec/coroparse.c
index 9ca49ed..29115c9 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -371,6 +371,7 @@ static int main_config_parser_cb(const char *path,
 			}
 
 			if ((strcmp(path, "quorum.two_node") == 0) ||
+			    (strcmp(path, "quorum.leave_remove") == 0) ||
 			    (strcmp(path, "quorum.wait_for_all") == 0) ||
 			    (strcmp(path, "quorum.auto_tie_breaker") == 0) ||
 			    (strcmp(path, "quorum.last_man_standing") == 0)) {
diff --git a/exec/votequorum.c b/exec/votequorum.c
index afef5cf..672237a 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -83,6 +83,8 @@ static int lowest_node_id = -1;
 static uint8_t last_man_standing = 0;
 static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
 
+static uint8_t leave_remove = 0;
+static uint32_t ev_barrier = 0;
 /*
  * votequorum_exec defines/structs/forward definitions
  */
@@ -135,13 +137,15 @@ static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid,
  * votequorum internal node status/view
  */
 
+#define NODE_FLAGS_LEAVING 1
+
 #define NODEID_US 0
 #define NODEID_QDEVICE UINT32_MAX
 
 typedef enum {
-	NODESTATE_JOINING=1,
-	NODESTATE_MEMBER,
-	NODESTATE_DEAD
+	NODESTATE_MEMBER=1,
+	NODESTATE_DEAD,
+	NODESTATE_LEAVING
 } nodestate_t;
 
 struct cluster_node {
@@ -214,6 +218,7 @@ static quorum_set_quorate_fn_t quorum_callback;
  */
 
 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
+static int votequorum_exec_exit_fn (void);
 
 static void message_handler_req_exec_votequorum_nodeinfo (
 	const void *message,
@@ -328,6 +333,7 @@ static struct corosync_service_engine votequorum_service_engine = {
 	.lib_engine			= quorum_lib_service,
 	.lib_engine_count		= sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
 	.exec_init_fn			= votequorum_exec_init_fn,
+	.exec_exit_fn			= votequorum_exec_exit_fn,
 	.exec_engine			= votequorum_exec_engine,
 	.exec_engine_count		= sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
 	.confchg_fn			= votequorum_confchg_fn,
@@ -488,6 +494,10 @@ static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsig
 
 	ENTER();
 
+	if ((leave_remove) && (allow_decrease) && (max_expected)) {
+		max_expected = max(ev_barrier, max_expected);
+	}
+
 	list_iterate(nodelist, &cluster_members_list) {
 		node = list_entry(nodelist, struct cluster_node, list);
 
@@ -724,6 +734,7 @@ static char *votequorum_readconfig_static(void)
 		wait_for_all = 1;
 	}
 
+	icmap_get_uint8("quorum.leave_remove", &leave_remove);
 	icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
 	icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker);
 	icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
@@ -759,6 +770,8 @@ static void votequorum_readconfig_dynamic(void)
 
 	icmap_get_uint32("quorum.expected_votes", &us->expected_votes);
 
+	ev_barrier = us->expected_votes;
+
 #ifdef EXPERIMENTAL_QUORUM_DEVICE_API
 	if (icmap_get_uint32("quorum.quorumdev_poll", &quorumdev_poll) != CS_OK) {
 		quorumdev_poll = DEFAULT_QDEV_POLL;
@@ -1012,10 +1025,18 @@ static void message_handler_req_exec_votequorum_nodeinfo (
 	nodestate_t old_state;
 	int new_node = 0;
 	int allow_downgrade = 0;
+	int by_node = 0;
 
 	ENTER();
 
 	log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", nodeid);
+	log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d wfa: %d quorate: %d flags: %d",
+					nodeid,
+					req_exec_quorum_nodeinfo->votes,
+					req_exec_quorum_nodeinfo->expected_votes,
+					req_exec_quorum_nodeinfo->wait_for_all_status,
+					req_exec_quorum_nodeinfo->quorate,
+					req_exec_quorum_nodeinfo->flags);
 
 	node = find_node_by_nodeid(nodeid);
 	if (!node) {
@@ -1036,7 +1057,14 @@ static void message_handler_req_exec_votequorum_nodeinfo (
 	/* Update node state */
 	node->votes = req_exec_quorum_nodeinfo->votes;
 	node->flags = req_exec_quorum_nodeinfo->flags;
-	node->state = NODESTATE_MEMBER;
+
+	if (node->flags & NODE_FLAGS_LEAVING) {
+		node->state = NODESTATE_LEAVING;
+		allow_downgrade = 1;
+		by_node = 1;
+	} else {
+		node->state = NODESTATE_MEMBER;
+	}
 
 	if ((!cluster_is_quorate) &&
 	    (req_exec_quorum_nodeinfo->quorate)) {
@@ -1050,12 +1078,6 @@ static void message_handler_req_exec_votequorum_nodeinfo (
 		node->expected_votes = us->expected_votes;
 	}
 
-	log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message: votes: %d, expected: %d wfa: %d quorate: %d",
-					req_exec_quorum_nodeinfo->votes,
-					req_exec_quorum_nodeinfo->expected_votes,
-					req_exec_quorum_nodeinfo->wait_for_all_status,
-					req_exec_quorum_nodeinfo->quorate);
-
 	if ((last_man_standing) && (req_exec_quorum_nodeinfo->votes > 1)) {
 		log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
 						 "cluster nodes votes are set to 1. Disabling LMS.");
@@ -1072,7 +1094,7 @@ static void message_handler_req_exec_votequorum_nodeinfo (
 	    old_expected != node->expected_votes ||
 	    old_flags != node->flags ||
 	    old_state != node->state) {
-		recalculate_quorum(allow_downgrade, 0);
+		recalculate_quorum(allow_downgrade, by_node);
 	}
 
 	if (!nodeid) {
@@ -1129,6 +1151,7 @@ static void message_handler_req_exec_votequorum_reconfigure (
 			}
 		}
 		votequorum_exec_send_expectedvotes_notification();
+		ev_barrier = req_exec_quorum_reconfigure->value;
 		recalculate_quorum(1, 0);  /* Allow decrease */
 		break;
 
@@ -1142,6 +1165,21 @@ static void message_handler_req_exec_votequorum_reconfigure (
 	LEAVE();
 }
 
+static int votequorum_exec_exit_fn (void)
+{
+	int ret = 0;
+
+	ENTER();
+
+	if (leave_remove) {
+		us->flags |= NODE_FLAGS_LEAVING;
+		ret = votequorum_exec_send_nodeinfo();
+	}
+
+	LEAVE();
+	return ret;
+}
+
 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
 {
 #ifdef COROSYNC_SOLARIS
@@ -1413,6 +1451,9 @@ static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *
 		if (auto_tie_breaker) {
 			res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
 		}
+		if (leave_remove) {
+			res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LEAVE_REMOVE;
+		}
 	} else {
 		error = CS_ERR_NOT_EXIST;
 	}
diff --git a/include/corosync/ipc_votequorum.h b/include/corosync/ipc_votequorum.h
index ce0edcb..3215ccc 100644
--- a/include/corosync/ipc_votequorum.h
+++ b/include/corosync/ipc_votequorum.h
@@ -108,6 +108,7 @@ struct res_lib_votequorum_status {
 #define VOTEQUORUM_INFO_WAIT_FOR_ALL	        4
 #define VOTEQUORUM_INFO_LAST_MAN_STANDING       8
 #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER       16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE           32
 
 struct res_lib_votequorum_getinfo {
 	struct qb_ipc_response_header header __attribute__((aligned(8)));
diff --git a/include/corosync/votequorum.h b/include/corosync/votequorum.h
index 2293c35..8173c45 100644
--- a/include/corosync/votequorum.h
+++ b/include/corosync/votequorum.h
@@ -51,13 +51,14 @@ typedef uint64_t votequorum_handle_t;
 #define VOTEQUORUM_INFO_WAIT_FOR_ALL            4
 #define VOTEQUORUM_INFO_LAST_MAN_STANDING       8
 #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER       16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE           32
 
 #define VOTEQUORUM_NODEID_US 0
 #define VOTEQUORUM_NODEID_QDEVICE -1
 
-#define NODESTATE_JOINING    1
-#define NODESTATE_MEMBER     2
-#define NODESTATE_DEAD       3
+#define NODESTATE_MEMBER     1
+#define NODESTATE_DEAD       2
+#define NODESTATE_LEAVING    3
 
 /** @} */
 
diff --git a/man/votequorum.5 b/man/votequorum.5
index e275a99..ab10662 100644
--- a/man/votequorum.5
+++ b/man/votequorum.5
@@ -270,9 +270,55 @@ quorum {
 }
 
 .fi
+.PP
+.B leave_remove: 1
+.PP
+Enables leave remove (LR) feature (default: 0).
+.PP
+The general behaviour of votequorum is to never decrease expected votes or quorum.
+.PP
+When LR is enabled, both expected votes and quorum are recalculated when
+a node leaves the cluster in a clean state (normal corosync shutdown process) down
+to configured expected_votes.
+.PP
+Example use case:
+.PP
+.nf
+1) N node cluster (where N is any value higher than 3)
+
+2) expected_votes set to 3 in corosync.conf
+
+3) only 3 nodes are running
+
+4) admin requires to increase processing power and adds 10 nodes
+
+5) internal expected_votes is automatically set to 13
+
+6) minimum expected_votes is 3 (from configuration)
+
+- up to this point this is standard votequorum behavior -
+
+7) once the work is done, admin wants to remove nodes from the cluster
+
+8) using an ordered shutdown the admin can reduce the cluster size
+   automatically back to 3, but not below 3, where normal quorum
+   operation will work as usual.
+
+.fi
+.PP
+Example configuration:
+.nf
+
+quorum {
+    provider: corosync_votequorum
+    expected_votes: 3
+    leave_remove: 1
+}
+.fi
+.PP
 .SH VARIOUS NOTES
 .PP
-* WFA / LMS / ATB can be used combined together.
+* WFA / LMS / ATB / LR can be used combined together.
 .PP
 * In order to change the default votes for a node there are two options:
 .nf
diff --git a/man/votequorum_getinfo.3 b/man/votequorum_getinfo.3
index 213e8a0..3a40104 100644
--- a/man/votequorum_getinfo.3
+++ b/man/votequorum_getinfo.3
@@ -67,6 +67,7 @@ struct votequorum_info {
 #define VOTEQUORUM_INFO_WAIT_FOR_ALL            4
 #define VOTEQUORUM_INFO_LAST_MAN_STANDING       8
 #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER       16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE           32
 
 .ta
 .fi
diff --git a/test/testvotequorum1.c b/test/testvotequorum1.c
index caab9a4..ff0a085 100644
--- a/test/testvotequorum1.c
+++ b/test/testvotequorum1.c
@@ -48,9 +48,6 @@ static votequorum_handle_t g_handle;
 static const char *node_state(int state)
 {
 	switch (state) {
-	case NODESTATE_JOINING:
-		return "Joining";
-		break;
 	case NODESTATE_MEMBER:
 		return "Member";
 		break;
@@ -130,6 +127,7 @@ int main(int argc, char *argv[])
 		if (info.flags & VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
 		if (info.flags & VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
 		if (info.flags & VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+		if (info.flags & VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
 
 		printf("\n");
 	}
@@ -159,6 +157,7 @@ int main(int argc, char *argv[])
 			if (info.flags & VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
 			if (info.flags & VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
 			if (info.flags & VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+			if (info.flags & VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
 			printf("\n");
 		}
 	}
diff --git a/tools/corosync-quorumtool.c b/tools/corosync-quorumtool.c
index 45c0e32..48266b2 100644
--- a/tools/corosync-quorumtool.c
+++ b/tools/corosync-quorumtool.c
@@ -341,6 +341,7 @@ static int display_quorum_data(int is_quorate, int loop)
 		if (info.flags & VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
 		if (info.flags & VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
 		if (info.flags & VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+		if (info.flags & VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
 		printf("\n");
 	} else {
 		fprintf(stderr, "votequorum_getinfo FAILED: %d\n", err);
-- 
1.7.7.6

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux