From: "Fabio M. Di Nitto" <fdinitto@xxxxxxxxxx> this also cleanup NODESTATE for good. JOINING was never used Signed-off-by: Fabio M. Di Nitto <fdinitto@xxxxxxxxxx> --- exec/coroparse.c | 1 + exec/votequorum.c | 63 ++++++++++++++++++++++++++++++------ include/corosync/ipc_votequorum.h | 1 + include/corosync/votequorum.h | 7 ++-- man/votequorum.5 | 48 +++++++++++++++++++++++++++- man/votequorum_getinfo.3 | 1 + test/testvotequorum1.c | 5 +-- tools/corosync-quorumtool.c | 1 + 8 files changed, 109 insertions(+), 18 deletions(-) diff --git a/exec/coroparse.c b/exec/coroparse.c index 9ca49ed..29115c9 100644 --- a/exec/coroparse.c +++ b/exec/coroparse.c @@ -371,6 +371,7 @@ static int main_config_parser_cb(const char *path, } if ((strcmp(path, "quorum.two_node") == 0) || + (strcmp(path, "quorum.leave_remove") == 0) || (strcmp(path, "quorum.wait_for_all") == 0) || (strcmp(path, "quorum.auto_tie_breaker") == 0) || (strcmp(path, "quorum.last_man_standing") == 0)) { diff --git a/exec/votequorum.c b/exec/votequorum.c index afef5cf..672237a 100644 --- a/exec/votequorum.c +++ b/exec/votequorum.c @@ -83,6 +83,8 @@ static int lowest_node_id = -1; static uint8_t last_man_standing = 0; static uint32_t last_man_standing_window = DEFAULT_LMS_WIN; +static uint8_t leave_remove = 0; +static uint32_t ev_barrier = 0; /* * votequorum_exec defines/structs/forward definitions */ @@ -135,13 +137,15 @@ static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, * votequorum internal node status/view */ +#define NODE_FLAGS_LEAVING 1 + #define NODEID_US 0 #define NODEID_QDEVICE UINT32_MAX typedef enum { - NODESTATE_JOINING=1, - NODESTATE_MEMBER, - NODESTATE_DEAD + NODESTATE_MEMBER=1, + NODESTATE_DEAD, + NODESTATE_LEAVING } nodestate_t; struct cluster_node { @@ -214,6 +218,7 @@ static quorum_set_quorate_fn_t quorum_callback; */ static char *votequorum_exec_init_fn (struct corosync_api_v1 *api); +static int votequorum_exec_exit_fn (void); static void message_handler_req_exec_votequorum_nodeinfo ( const void *message, @@ -328,6 +333,7 @@ static struct corosync_service_engine votequorum_service_engine = { .lib_engine = quorum_lib_service, .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler), .exec_init_fn = votequorum_exec_init_fn, + .exec_exit_fn = votequorum_exec_exit_fn, .exec_engine = votequorum_exec_engine, .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler), .confchg_fn = votequorum_confchg_fn, @@ -488,6 +494,10 @@ static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsig ENTER(); + if ((leave_remove) && (allow_decrease) && (max_expected)) { + max_expected = max(ev_barrier, max_expected); + } + list_iterate(nodelist, &cluster_members_list) { node = list_entry(nodelist, struct cluster_node, list); @@ -724,6 +734,7 @@ static char *votequorum_readconfig_static(void) wait_for_all = 1; } + icmap_get_uint8("quorum.leave_remove", &leave_remove); icmap_get_uint8("quorum.wait_for_all", &wait_for_all); icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker); icmap_get_uint8("quorum.last_man_standing", &last_man_standing); @@ -759,6 +770,8 @@ static void votequorum_readconfig_dynamic(void) icmap_get_uint32("quorum.expected_votes", &us->expected_votes); + ev_barrier = us->expected_votes; + #ifdef EXPERIMENTAL_QUORUM_DEVICE_API if (icmap_get_uint32("quorum.quorumdev_poll", &quorumdev_poll) != CS_OK) { quorumdev_poll = DEFAULT_QDEV_POLL; @@ -1012,10 +1025,18 @@ static void message_handler_req_exec_votequorum_nodeinfo ( nodestate_t old_state; int new_node = 0; int allow_downgrade = 0; + int by_node = 0; ENTER(); log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", nodeid); + log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d wfa: %d quorate: %d flags: %d", + nodeid, + req_exec_quorum_nodeinfo->votes, + req_exec_quorum_nodeinfo->expected_votes, + req_exec_quorum_nodeinfo->wait_for_all_status, + req_exec_quorum_nodeinfo->quorate, + req_exec_quorum_nodeinfo->flags); node = find_node_by_nodeid(nodeid); if (!node) { @@ -1036,7 +1057,14 @@ static void message_handler_req_exec_votequorum_nodeinfo ( /* Update node state */ node->votes = req_exec_quorum_nodeinfo->votes; node->flags = req_exec_quorum_nodeinfo->flags; - node->state = NODESTATE_MEMBER; + + if (node->flags & NODE_FLAGS_LEAVING) { + node->state = NODESTATE_LEAVING; + allow_downgrade = 1; + by_node = 1; + } else { + node->state = NODESTATE_MEMBER; + } if ((!cluster_is_quorate) && (req_exec_quorum_nodeinfo->quorate)) { @@ -1050,12 +1078,6 @@ static void message_handler_req_exec_votequorum_nodeinfo ( node->expected_votes = us->expected_votes; } - log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message: votes: %d, expected: %d wfa: %d quorate: %d", - req_exec_quorum_nodeinfo->votes, - req_exec_quorum_nodeinfo->expected_votes, - req_exec_quorum_nodeinfo->wait_for_all_status, - req_exec_quorum_nodeinfo->quorate); - if ((last_man_standing) && (req_exec_quorum_nodeinfo->votes > 1)) { log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all" "cluster nodes votes are set to 1. Disabling LMS."); @@ -1072,7 +1094,7 @@ static void message_handler_req_exec_votequorum_nodeinfo ( old_expected != node->expected_votes || old_flags != node->flags || old_state != node->state) { - recalculate_quorum(allow_downgrade, 0); + recalculate_quorum(allow_downgrade, by_node); } if (!nodeid) { @@ -1129,6 +1151,7 @@ static void message_handler_req_exec_votequorum_reconfigure ( } } votequorum_exec_send_expectedvotes_notification(); + ev_barrier = req_exec_quorum_reconfigure->value; recalculate_quorum(1, 0); /* Allow decrease */ break; @@ -1142,6 +1165,21 @@ static void message_handler_req_exec_votequorum_reconfigure ( LEAVE(); } +static int votequorum_exec_exit_fn (void) +{ + int ret = 0; + + ENTER(); + + if (leave_remove) { + us->flags |= NODE_FLAGS_LEAVING; + ret = votequorum_exec_send_nodeinfo(); + } + + LEAVE(); + return ret; +} + static char *votequorum_exec_init_fn (struct corosync_api_v1 *api) { #ifdef COROSYNC_SOLARIS @@ -1413,6 +1451,9 @@ static void message_handler_req_lib_votequorum_getinfo (void *conn, const void * if (auto_tie_breaker) { res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER; } + if (leave_remove) { + res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LEAVE_REMOVE; + } } else { error = CS_ERR_NOT_EXIST; } diff --git a/include/corosync/ipc_votequorum.h b/include/corosync/ipc_votequorum.h index ce0edcb..3215ccc 100644 --- a/include/corosync/ipc_votequorum.h +++ b/include/corosync/ipc_votequorum.h @@ -108,6 +108,7 @@ struct res_lib_votequorum_status { #define VOTEQUORUM_INFO_WAIT_FOR_ALL 4 #define VOTEQUORUM_INFO_LAST_MAN_STANDING 8 #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER 16 +#define VOTEQUORUM_INFO_LEAVE_REMOVE 32 struct res_lib_votequorum_getinfo { struct qb_ipc_response_header header __attribute__((aligned(8))); diff --git a/include/corosync/votequorum.h b/include/corosync/votequorum.h index 2293c35..8173c45 100644 --- a/include/corosync/votequorum.h +++ b/include/corosync/votequorum.h @@ -51,13 +51,14 @@ typedef uint64_t votequorum_handle_t; #define VOTEQUORUM_INFO_WAIT_FOR_ALL 4 #define VOTEQUORUM_INFO_LAST_MAN_STANDING 8 #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER 16 +#define VOTEQUORUM_INFO_LEAVE_REMOVE 32 #define VOTEQUORUM_NODEID_US 0 #define VOTEQUORUM_NODEID_QDEVICE -1 -#define NODESTATE_JOINING 1 -#define NODESTATE_MEMBER 2 -#define NODESTATE_DEAD 3 +#define NODESTATE_MEMBER 1 +#define NODESTATE_DEAD 2 +#define NODESTATE_LEAVING 3 /** @} */ diff --git a/man/votequorum.5 b/man/votequorum.5 index e275a99..ab10662 100644 --- a/man/votequorum.5 +++ b/man/votequorum.5 @@ -270,9 +270,55 @@ quorum { } .fi +.PP +.B leave_remove: 1 +.PP +Enables leave remove (LR) feature (default: 0). +.PP +The general behaviour of votequorum is to never decrease expected votes or quorum. +.PP +When LR is enabled, both expected votes and quorum are recalculated when +a node leaves the cluster in a clean state (normal corosync shutdown process) down +to configured expected_votes. +.PP +Example use case: +.PP +.nf +1) N node cluster (where N is any value higher than 3) + +2) expected_votes set to 3 in corosync.conf + +3) only 3 nodes are running + +4) admin requires to increase processing power and adds 10 nodes + +5) internal expected_votes is automatically set to 13 + +6) minimum expected_votes is 3 (from configuration) + +- up to this point this is standard votequorum behavior - + +7) once the work is done, admin wants to remove nodes from the cluster + +8) using an ordered shutdown the admin can reduce the cluster size + automatically back to 3, but not below 3, where normal quorum + operation will work as usual. + +.fi +.PP +Example configuration: +.nf + +quorum { + provider: corosync_votequorum + expected_votes: 3 + leave_remove: 1 +} +.fi +.PP .SH VARIOUS NOTES .PP -* WFA / LMS / ATB can be used combined together. +* WFA / LMS / ATB / LR can be used combined together. .PP * In order to change the default votes for a node there are two options: .nf diff --git a/man/votequorum_getinfo.3 b/man/votequorum_getinfo.3 index 213e8a0..3a40104 100644 --- a/man/votequorum_getinfo.3 +++ b/man/votequorum_getinfo.3 @@ -67,6 +67,7 @@ struct votequorum_info { #define VOTEQUORUM_INFO_WAIT_FOR_ALL 4 #define VOTEQUORUM_INFO_LAST_MAN_STANDING 8 #define VOTEQUORUM_INFO_AUTO_TIE_BREAKER 16 +#define VOTEQUORUM_INFO_LEAVE_REMOVE 32 .ta .fi diff --git a/test/testvotequorum1.c b/test/testvotequorum1.c index caab9a4..ff0a085 100644 --- a/test/testvotequorum1.c +++ b/test/testvotequorum1.c @@ -48,9 +48,6 @@ static votequorum_handle_t g_handle; static const char *node_state(int state) { switch (state) { - case NODESTATE_JOINING: - return "Joining"; - break; case NODESTATE_MEMBER: return "Member"; break; @@ -130,6 +127,7 @@ int main(int argc, char *argv[]) if (info.flags & VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll "); if (info.flags & VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding "); if (info.flags & VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker "); + if (info.flags & VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove "); printf("\n"); } @@ -159,6 +157,7 @@ int main(int argc, char *argv[]) if (info.flags & VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll "); if (info.flags & VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding "); if (info.flags & VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker "); + if (info.flags & VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove "); printf("\n"); } } diff --git a/tools/corosync-quorumtool.c b/tools/corosync-quorumtool.c index 45c0e32..48266b2 100644 --- a/tools/corosync-quorumtool.c +++ b/tools/corosync-quorumtool.c @@ -341,6 +341,7 @@ static int display_quorum_data(int is_quorate, int loop) if (info.flags & VOTEQUORUM_INFO_WAIT_FOR_ALL) printf("WaitForAll "); if (info.flags & VOTEQUORUM_INFO_LAST_MAN_STANDING) printf("LastManStanding "); if (info.flags & VOTEQUORUM_INFO_AUTO_TIE_BREAKER) printf("AutoTieBreaker "); + if (info.flags & VOTEQUORUM_INFO_LEAVE_REMOVE) printf("LeaveRemove "); printf("\n"); } else { fprintf(stderr, "votequorum_getinfo FAILED: %d\n", err); -- 1.7.7.6 _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss