From: "Fabio M. Di Nitto"<fdinitto@xxxxxxxxxx>
this also cleanup NODESTATE for good. JOINING was never used
Signed-off-by: Fabio M. Di Nitto<fdinitto@xxxxxxxxxx>
---
exec/coroparse.c | 1 +
exec/votequorum.c | 63 ++++++++++++++++++++++++++++++------
include/corosync/ipc_votequorum.h | 1 +
include/corosync/votequorum.h | 7 ++--
man/votequorum.5 | 48 +++++++++++++++++++++++++++-
man/votequorum_getinfo.3 | 1 +
test/testvotequorum1.c | 5 +--
tools/corosync-quorumtool.c | 1 +
8 files changed, 109 insertions(+), 18 deletions(-)
diff --git a/exec/coroparse.c b/exec/coroparse.c
index 9ca49ed..29115c9 100644
--- a/exec/coroparse.c
+++ b/exec/coroparse.c
@@ -371,6 +371,7 @@ static int main_config_parser_cb(const char *path,
}
if ((strcmp(path, "quorum.two_node") == 0) ||
+ (strcmp(path, "quorum.leave_remove") == 0) ||
(strcmp(path, "quorum.wait_for_all") == 0) ||
(strcmp(path, "quorum.auto_tie_breaker") == 0) ||
(strcmp(path, "quorum.last_man_standing") == 0)) {
diff --git a/exec/votequorum.c b/exec/votequorum.c
index afef5cf..672237a 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -83,6 +83,8 @@ static int lowest_node_id = -1;
static uint8_t last_man_standing = 0;
static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
+static uint8_t leave_remove = 0;
+static uint32_t ev_barrier = 0;
/*
* votequorum_exec defines/structs/forward definitions
*/
@@ -135,13 +137,15 @@ static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid,
* votequorum internal node status/view
*/
+#define NODE_FLAGS_LEAVING 1
+
#define NODEID_US 0
#define NODEID_QDEVICE UINT32_MAX
typedef enum {
- NODESTATE_JOINING=1,
- NODESTATE_MEMBER,
- NODESTATE_DEAD
+ NODESTATE_MEMBER=1,
+ NODESTATE_DEAD,
+ NODESTATE_LEAVING
} nodestate_t;
struct cluster_node {
@@ -214,6 +218,7 @@ static quorum_set_quorate_fn_t quorum_callback;
*/
static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
+static int votequorum_exec_exit_fn (void);
static void message_handler_req_exec_votequorum_nodeinfo (
const void *message,
@@ -328,6 +333,7 @@ static struct corosync_service_engine votequorum_service_engine = {
.lib_engine = quorum_lib_service,
.lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
.exec_init_fn = votequorum_exec_init_fn,
+ .exec_exit_fn = votequorum_exec_exit_fn,
.exec_engine = votequorum_exec_engine,
.exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
.confchg_fn = votequorum_confchg_fn,
@@ -488,6 +494,10 @@ static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsig
ENTER();
+ if ((leave_remove)&& (allow_decrease)&& (max_expected)) {
+ max_expected = max(ev_barrier, max_expected);
+ }
+
list_iterate(nodelist,&cluster_members_list) {
node = list_entry(nodelist, struct cluster_node, list);
@@ -724,6 +734,7 @@ static char *votequorum_readconfig_static(void)
wait_for_all = 1;
}
+ icmap_get_uint8("quorum.leave_remove",&leave_remove);
icmap_get_uint8("quorum.wait_for_all",&wait_for_all);
icmap_get_uint8("quorum.auto_tie_breaker",&auto_tie_breaker);
icmap_get_uint8("quorum.last_man_standing",&last_man_standing);
@@ -759,6 +770,8 @@ static void votequorum_readconfig_dynamic(void)
icmap_get_uint32("quorum.expected_votes",&us->expected_votes);
+ ev_barrier = us->expected_votes;
+
#ifdef EXPERIMENTAL_QUORUM_DEVICE_API
if (icmap_get_uint32("quorum.quorumdev_poll",&quorumdev_poll) != CS_OK) {
quorumdev_poll = DEFAULT_QDEV_POLL;
@@ -1012,10 +1025,18 @@ static void message_handler_req_exec_votequorum_nodeinfo (
nodestate_t old_state;
int new_node = 0;
int allow_downgrade = 0;
+ int by_node = 0;
ENTER();
log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", nodeid);
+ log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d wfa: %d quorate: %d flags: %d",
+ nodeid,
+ req_exec_quorum_nodeinfo->votes,
+ req_exec_quorum_nodeinfo->expected_votes,
+ req_exec_quorum_nodeinfo->wait_for_all_status,
+ req_exec_quorum_nodeinfo->quorate,
+ req_exec_quorum_nodeinfo->flags);
node = find_node_by_nodeid(nodeid);
if (!node) {
@@ -1036,7 +1057,14 @@ static void message_handler_req_exec_votequorum_nodeinfo (
/* Update node state */
node->votes = req_exec_quorum_nodeinfo->votes;
node->flags = req_exec_quorum_nodeinfo->flags;
- node->state = NODESTATE_MEMBER;
+
+ if (node->flags& NODE_FLAGS_LEAVING) {
+ node->state = NODESTATE_LEAVING;
+ allow_downgrade = 1;
+ by_node = 1;
+ } else {
+ node->state = NODESTATE_MEMBER;
+ }
if ((!cluster_is_quorate)&&
(req_exec_quorum_nodeinfo->quorate)) {
@@ -1050,12 +1078,6 @@ static void message_handler_req_exec_votequorum_nodeinfo (
node->expected_votes = us->expected_votes;
}
- log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message: votes: %d, expected: %d wfa: %d quorate: %d",
- req_exec_quorum_nodeinfo->votes,
- req_exec_quorum_nodeinfo->expected_votes,
- req_exec_quorum_nodeinfo->wait_for_all_status,
- req_exec_quorum_nodeinfo->quorate);
-
if ((last_man_standing)&& (req_exec_quorum_nodeinfo->votes> 1)) {
log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
"cluster nodes votes are set to 1. Disabling LMS.");
@@ -1072,7 +1094,7 @@ static void message_handler_req_exec_votequorum_nodeinfo (
old_expected != node->expected_votes ||
old_flags != node->flags ||
old_state != node->state) {
- recalculate_quorum(allow_downgrade, 0);
+ recalculate_quorum(allow_downgrade, by_node);
}
if (!nodeid) {
@@ -1129,6 +1151,7 @@ static void message_handler_req_exec_votequorum_reconfigure (
}
}
votequorum_exec_send_expectedvotes_notification();
+ ev_barrier = req_exec_quorum_reconfigure->value;
recalculate_quorum(1, 0); /* Allow decrease */
break;
@@ -1142,6 +1165,21 @@ static void message_handler_req_exec_votequorum_reconfigure (
LEAVE();
}
+static int votequorum_exec_exit_fn (void)
+{
+ int ret = 0;
+
+ ENTER();
+
+ if (leave_remove) {
+ us->flags |= NODE_FLAGS_LEAVING;
+ ret = votequorum_exec_send_nodeinfo();
+ }
+
+ LEAVE();
+ return ret;
+}
+
static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
{
#ifdef COROSYNC_SOLARIS
@@ -1413,6 +1451,9 @@ static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *
if (auto_tie_breaker) {
res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
}
+ if (leave_remove) {
+ res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LEAVE_REMOVE;
+ }
} else {
error = CS_ERR_NOT_EXIST;
}
diff --git a/include/corosync/ipc_votequorum.h b/include/corosync/ipc_votequorum.h
index ce0edcb..3215ccc 100644
--- a/include/corosync/ipc_votequorum.h
+++ b/include/corosync/ipc_votequorum.h
@@ -108,6 +108,7 @@ struct res_lib_votequorum_status {
#define VOTEQUORUM_INFO_WAIT_FOR_ALL 4
#define VOTEQUORUM_INFO_LAST_MAN_STANDING 8
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER 16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE 32
struct res_lib_votequorum_getinfo {
struct qb_ipc_response_header header __attribute__((aligned(8)));
diff --git a/include/corosync/votequorum.h b/include/corosync/votequorum.h
index 2293c35..8173c45 100644
--- a/include/corosync/votequorum.h
+++ b/include/corosync/votequorum.h
@@ -51,13 +51,14 @@ typedef uint64_t votequorum_handle_t;
#define VOTEQUORUM_INFO_WAIT_FOR_ALL 4
#define VOTEQUORUM_INFO_LAST_MAN_STANDING 8
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER 16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE 32
#define VOTEQUORUM_NODEID_US 0
#define VOTEQUORUM_NODEID_QDEVICE -1
-#define NODESTATE_JOINING 1
-#define NODESTATE_MEMBER 2
-#define NODESTATE_DEAD 3
+#define NODESTATE_MEMBER 1
+#define NODESTATE_DEAD 2
+#define NODESTATE_LEAVING 3
/** @} */
diff --git a/man/votequorum.5 b/man/votequorum.5
index e275a99..ab10662 100644
--- a/man/votequorum.5
+++ b/man/votequorum.5
@@ -270,9 +270,55 @@ quorum {
}
.fi
+.PP
+.B leave_remove: 1
+.PP
+Enables leave remove (LR) feature (default: 0).
+.PP
+The general behaviour of votequorum is to never decrease expected votes or quorum.
+.PP
+When LR is enabled, both expected votes and quorum are recalculated when
+a node leaves the cluster in a clean state (normal corosync shutdown process) down
+to configured expected_votes.
+.PP
+Example use case:
+.PP
+.nf
+1) N node cluster (where N is any value higher than 3)
+
+2) expected_votes set to 3 in corosync.conf
+
+3) only 3 nodes are running
+
+4) admin requires to increase processing power and adds 10 nodes
+
+5) internal expected_votes is automatically set to 13
+
+6) minimum expected_votes is 3 (from configuration)
+
+- up to this point this is standard votequorum behavior -
+
+7) once the work is done, admin wants to remove nodes from the cluster
+
+8) using an ordered shutdown the admin can reduce the cluster size
+ automatically back to 3, but not below 3, where normal quorum
+ operation will work as usual.
+
+.fi
+.PP
+Example configuration:
+.nf
+
+quorum {
+ provider: corosync_votequorum
+ expected_votes: 3
+ leave_remove: 1
+}
+.fi
+.PP
.SH VARIOUS NOTES
.PP
-* WFA / LMS / ATB can be used combined together.
+* WFA / LMS / ATB / LR can be used combined together.
.PP
* In order to change the default votes for a node there are two options:
.nf
diff --git a/man/votequorum_getinfo.3 b/man/votequorum_getinfo.3
index 213e8a0..3a40104 100644
--- a/man/votequorum_getinfo.3
+++ b/man/votequorum_getinfo.3
@@ -67,6 +67,7 @@ struct votequorum_info {
#define VOTEQUORUM_INFO_WAIT_FOR_ALL 4
#define VOTEQUORUM_INFO_LAST_MAN_STANDING 8
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER 16
+#define VOTEQUORUM_INFO_LEAVE_REMOVE 32
.ta
.fi
diff --git a/test/testvotequorum1.c b/test/testvotequorum1.c
index caab9a4..ff0a085 100644
--- a/test/testvotequorum1.c
+++ b/test/testvotequorum1.c
@@ -48,9 +48,6 @@ static votequorum_handle_t g_handle;
static const char *node_state(int state)
{
switch (state) {
- case NODESTATE_JOINING:
- return "Joining";
- break;
case NODESTATE_MEMBER:
return "Member";
break;
@@ -130,6 +127,7 @@ int main(int argc, char *argv[])
if (info.flags& VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
if (info.flags& VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
if (info.flags& VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+ if (info.flags& VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
printf("\n");
}
@@ -159,6 +157,7 @@ int main(int argc, char *argv[])
if (info.flags& VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
if (info.flags& VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
if (info.flags& VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+ if (info.flags& VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
printf("\n");
}
}
diff --git a/tools/corosync-quorumtool.c b/tools/corosync-quorumtool.c
index 45c0e32..48266b2 100644
--- a/tools/corosync-quorumtool.c
+++ b/tools/corosync-quorumtool.c
@@ -341,6 +341,7 @@ static int display_quorum_data(int is_quorate, int loop)
if (info.flags& VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll ");
if (info.flags& VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding ");
if (info.flags& VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker ");
+ if (info.flags& VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove ");
printf("\n");
} else {
fprintf(stderr, "votequorum_getinfo FAILED: %d\n", err);