Re: [PATCH] votequorum: add leave_remove option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Reviewed-By: Christine Caulfield <ccaulfie@xxxxxxxxxx>

ACK

On 31/01/12 15:43, Fabio M. Di Nitto wrote:
From: "Fabio M. Di Nitto"<fdinitto@xxxxxxxxxx>

this also cleanup NODESTATE for good. JOINING was never used

Signed-off-by: Fabio M. Di Nitto<fdinitto@xxxxxxxxxx>
---
  exec/coroparse.c                  |    1 +
  exec/votequorum.c                 |   63 ++++++++++++++++++++++++++++++------
  include/corosync/ipc_votequorum.h |    1 +
  include/corosync/votequorum.h     |    7 ++--
  man/votequorum.5                  |   48 +++++++++++++++++++++++++++-
  man/votequorum_getinfo.3          |    1 +
  test/testvotequorum1.c            |    5 +--
  tools/corosync-quorumtool.c       |    1 +
  8 files changed, 109 insertions(+), 18 deletions(-)

diff --git a/exec/coroparse.c b/exec/coroparse.c
index 9ca49ed..29115c9 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -371,6 +371,7 @@ static int main_config_parser_cb(const char *path,
  			}

  			if ((strcmp(path, "quorum.two_node") == 0) ||
+			    (strcmp(path, "quorum.leave_remove") == 0) ||
  			    (strcmp(path, "quorum.wait_for_all") == 0) ||
  			    (strcmp(path, "quorum.auto_tie_breaker") == 0) ||
  			    (strcmp(path, "quorum.last_man_standing") == 0)) {
diff --git a/exec/votequorum.c b/exec/votequorum.c
index afef5cf..672237a 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -83,6 +83,8 @@ static int lowest_node_id = -1;
  static uint8_t last_man_standing = 0;
  static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;

+static uint8_t leave_remove = 0;
+static uint32_t ev_barrier = 0;
  /*
   * votequorum_exec defines/structs/forward definitions
   */
@@ -135,13 +137,15 @@ static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid,
   * votequorum internal node status/view
   */

+#define NODE_FLAGS_LEAVING 1
+
  #define NODEID_US 0
  #define NODEID_QDEVICE UINT32_MAX

  typedef enum {
-	NODESTATE_JOINING=1,
-	NODESTATE_MEMBER,
-	NODESTATE_DEAD
+	NODESTATE_MEMBER=1,
+	NODESTATE_DEAD,
+	NODESTATE_LEAVING
  } nodestate_t;

  struct cluster_node {
@@ -214,6 +218,7 @@ static quorum_set_quorate_fn_t quorum_callback;
   */

  static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
+static int votequorum_exec_exit_fn (void);

  static void message_handler_req_exec_votequorum_nodeinfo (
  	const void *message,
@@ -328,6 +333,7 @@ static struct corosync_service_engine votequorum_service_engine = {
  	.lib_engine			= quorum_lib_service,
  	.lib_engine_count		= sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
  	.exec_init_fn			= votequorum_exec_init_fn,
+	.exec_exit_fn			= votequorum_exec_exit_fn,
  	.exec_engine			= votequorum_exec_engine,
  	.exec_engine_count		= sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
  	.confchg_fn			= votequorum_confchg_fn,
@@ -488,6 +494,10 @@ static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsig

  	ENTER();

+	if ((leave_remove)&&  (allow_decrease)&&  (max_expected)) {
+		max_expected = max(ev_barrier, max_expected);
+	}
+
  	list_iterate(nodelist,&cluster_members_list) {
  		node = list_entry(nodelist, struct cluster_node, list);

@@ -724,6 +734,7 @@ static char *votequorum_readconfig_static(void)
  		wait_for_all = 1;
  	}

+	icmap_get_uint8("quorum.leave_remove",&leave_remove);
  	icmap_get_uint8("quorum.wait_for_all",&wait_for_all);
  	icmap_get_uint8("quorum.auto_tie_breaker",&auto_tie_breaker);
  	icmap_get_uint8("quorum.last_man_standing",&last_man_standing);
@@ -759,6 +770,8 @@ static void votequorum_readconfig_dynamic(void)

  	icmap_get_uint32("quorum.expected_votes",&us->expected_votes);

+	ev_barrier = us->expected_votes;
+
  #ifdef EXPERIMENTAL_QUORUM_DEVICE_API
  	if (icmap_get_uint32("quorum.quorumdev_poll",&quorumdev_poll) != CS_OK) {
  		quorumdev_poll = DEFAULT_QDEV_POLL;
@@ -1012,10 +1025,18 @@ static void message_handler_req_exec_votequorum_nodeinfo (
  	nodestate_t old_state;
  	int new_node = 0;
  	int allow_downgrade = 0;
+	int by_node = 0;

  	ENTER();

  	log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", nodeid);
+	log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d wfa: %d quorate: %d flags: %d",
+					nodeid,
+					req_exec_quorum_nodeinfo->votes,
+					req_exec_quorum_nodeinfo->expected_votes,
+					req_exec_quorum_nodeinfo->wait_for_all_status,
+					req_exec_quorum_nodeinfo->quorate,
+					req_exec_quorum_nodeinfo->flags);

  	node = find_node_by_nodeid(nodeid);
  	if (!node) {
@@ -1036,7 +1057,14 @@ static void message_handler_req_exec_votequorum_nodeinfo (
  	/* Update node state */
  	node->votes = req_exec_quorum_nodeinfo->votes;
  	node->flags = req_exec_quorum_nodeinfo->flags;
-	node->state = NODESTATE_MEMBER;
+
+	if (node->flags&  NODE_FLAGS_LEAVING) {
+		node->state = NODESTATE_LEAVING;
+		allow_downgrade = 1;
+		by_node = 1;
+	} else {
+		node->state = NODESTATE_MEMBER;
+	}

  	if ((!cluster_is_quorate)&&
  	(req_exec_quorum_nodeinfo->quorate)) {
@@ -1050,12 +1078,6 @@ static void message_handler_req_exec_votequorum_nodeinfo (
  		node->expected_votes = us->expected_votes;
  	}

-	log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message: votes: %d, expected: %d wfa: %d quorate: %d",
-					req_exec_quorum_nodeinfo->votes,
-					req_exec_quorum_nodeinfo->expected_votes,
-					req_exec_quorum_nodeinfo->wait_for_all_status,
-					req_exec_quorum_nodeinfo->quorate);
-
  	if ((last_man_standing)&&  (req_exec_quorum_nodeinfo->votes>  1)) {
  		log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
  						 "cluster nodes votes are set to 1. Disabling LMS.");
@@ -1072,7 +1094,7 @@ static void message_handler_req_exec_votequorum_nodeinfo (
  	    old_expected != node->expected_votes ||
  	    old_flags != node->flags ||
  	    old_state != node->state) {
-		recalculate_quorum(allow_downgrade, 0);
+		recalculate_quorum(allow_downgrade, by_node);
  	}

  	if (!nodeid) {
@@ -1129,6 +1151,7 @@ static void message_handler_req_exec_votequorum_reconfigure (
  			}
  		}
  		votequorum_exec_send_expectedvotes_notification();
+		ev_barrier = req_exec_quorum_reconfigure->value;
  		recalculate_quorum(1, 0);  /* Allow decrease */
  		break;

@@ -1142,6 +1165,21 @@ static void message_handler_req_exec_votequorum_reconfigure (
  	LEAVE();
  }

+static int votequorum_exec_exit_fn (void)
+{
+	int ret = 0;
+
+	ENTER();
+
+	if (leave_remove) {
+		us->flags |= NODE_FLAGS_LEAVING;
+		ret = votequorum_exec_send_nodeinfo();
+	}
+
+	LEAVE();
+	return ret;
+}
+
  static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
  {
  #ifdef COROSYNC_SOLARIS
@@ -1413,6 +1451,9 @@ static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *
  		if (auto_tie_breaker) {
  			res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
  		}
+		if (leave_remove) {
+			res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LEAVE_REMOVE;
+		}
  	} else {
  		error = CS_ERR_NOT_EXIST;
  	}
diff --git a/include/corosync/ipc_votequorum.h b/include/corosync/ipc_votequorum.h
index ce0edcb..3215ccc 100644
--- a/include/corosync/ipc_votequorum.h
+++ b/include/corosync/ipc_votequorum.h
@@ -108,6 +108,7 @@ struct res_lib_votequorum_status {
  #define VOTEQUORUM_INFO_WAIT_FOR_ALL	        4
  #define VOTEQUORUM_INFO_LAST_MAN_STANDING       8
  #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER       16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE           32

  struct res_lib_votequorum_getinfo {
  	struct qb_ipc_response_header header __attribute__((aligned(8)));
diff --git a/include/corosync/votequorum.h b/include/corosync/votequorum.h
index 2293c35..8173c45 100644
--- a/include/corosync/votequorum.h
+++ b/include/corosync/votequorum.h
@@ -51,13 +51,14 @@ typedef uint64_t votequorum_handle_t;
  #define VOTEQUORUM_INFO_WAIT_FOR_ALL            4
  #define VOTEQUORUM_INFO_LAST_MAN_STANDING       8
  #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER       16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE           32

  #define VOTEQUORUM_NODEID_US 0
  #define VOTEQUORUM_NODEID_QDEVICE -1

-#define NODESTATE_JOINING    1
-#define NODESTATE_MEMBER     2
-#define NODESTATE_DEAD       3
+#define NODESTATE_MEMBER     1
+#define NODESTATE_DEAD       2
+#define NODESTATE_LEAVING    3

  /** @} */

diff --git a/man/votequorum.5 b/man/votequorum.5
index e275a99..ab10662 100644
--- a/man/votequorum.5
+++ b/man/votequorum.5
@@ -270,9 +270,55 @@ quorum {
  }

  .fi
+.PP
+.B leave_remove: 1
+.PP
+Enables leave remove (LR) feature (default: 0).
+.PP
+The general behaviour of votequorum is to never decrease expected votes or quorum.
+.PP
+When LR is enabled, both expected votes and quorum are recalculated when
+a node leaves the cluster in a clean state (normal corosync shutdown process) down
+to configured expected_votes.
+.PP
+Example use case:
+.PP
+.nf
+1) N node cluster (where N is any value higher than 3)
+
+2) expected_votes set to 3 in corosync.conf
+
+3) only 3 nodes are running
+
+4) admin requires to increase processing power and adds 10 nodes
+
+5) internal expected_votes is automatically set to 13
+
+6) minimum expected_votes is 3 (from configuration)
+
+- up to this point this is standard votequorum behavior -
+
+7) once the work is done, admin wants to remove nodes from the cluster
+
+8) using an ordered shutdown the admin can reduce the cluster size
+   automatically back to 3, but not below 3, where normal quorum
+   operation will work as usual.
+
+.fi
+.PP
+Example configuration:
+.nf
+
+quorum {
+    provider: corosync_votequorum
+    expected_votes: 3
+    leave_remove: 1
+}
+.fi
+.PP
  .SH VARIOUS NOTES
  .PP
-* WFA / LMS / ATB can be used combined together.
+* WFA / LMS / ATB / LR can be used combined together.
  .PP
  * In order to change the default votes for a node there are two options:
  .nf
diff --git a/man/votequorum_getinfo.3 b/man/votequorum_getinfo.3
index 213e8a0..3a40104 100644
--- a/man/votequorum_getinfo.3
+++ b/man/votequorum_getinfo.3
@@ -67,6 +67,7 @@ struct votequorum_info {
  #define VOTEQUORUM_INFO_WAIT_FOR_ALL            4
  #define VOTEQUORUM_INFO_LAST_MAN_STANDING       8
  #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER       16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE           32

  .ta
  .fi
diff --git a/test/testvotequorum1.c b/test/testvotequorum1.c
index caab9a4..ff0a085 100644
--- a/test/testvotequorum1.c
+++ b/test/testvotequorum1.c
@@ -48,9 +48,6 @@ static votequorum_handle_t g_handle;
  static const char *node_state(int state)
  {
  	switch (state) {
-	case NODESTATE_JOINING:
-		return "Joining";
-		break;
  	case NODESTATE_MEMBER:
  		return "Member";
  		break;
@@ -130,6 +127,7 @@ int main(int argc, char *argv[])
  		if (info.flags&  VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
  		if (info.flags&  VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
  		if (info.flags&  VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+		if (info.flags&  VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");

  		printf("\n");
  	}
@@ -159,6 +157,7 @@ int main(int argc, char *argv[])
  			if (info.flags&  VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
  			if (info.flags&  VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
  			if (info.flags&  VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+			if (info.flags&  VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
  			printf("\n");
  		}
  	}
diff --git a/tools/corosync-quorumtool.c b/tools/corosync-quorumtool.c
index 45c0e32..48266b2 100644
--- a/tools/corosync-quorumtool.c
+++ b/tools/corosync-quorumtool.c
@@ -341,6 +341,7 @@ static int display_quorum_data(int is_quorate, int loop)
  		if (info.flags&  VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
  		if (info.flags&  VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
  		if (info.flags&  VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+		if (info.flags&  VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
  		printf("\n");
  	} else {
  		fprintf(stderr, "votequorum_getinfo FAILED: %d\n", err);

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux