Re: [PATCH 09/21] votequorum: add last_man_standing support (default: off)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Reviewed-by: Steven Dake <sdake@xxxxxxxxxx>

On 01/10/2012 02:23 AM, Fabio M. Di Nitto wrote:
> From: "Fabio M. Di Nitto" <fdinitto@xxxxxxxxxx>
> 
> this flag (0|1) can be configured via quorum.last_man_standing and when
> enabled, it allows expected_votes to be dynamically recalculated.
> 
> Assuming an 8 nodes cluster, every node votes 1 (mandatory requirement for
> this feature).
> 
> In the first event, 3 nodes are lost.
> 
> The remaining partition of 5 is barely quorate.
> 
> After a configurable timeout (quorum.last_man_standing_window, default 10sec)
> the quorate partition is allow to recalculate expected_votes based on
> the remaining nodes.
> 
> This operation will bring expected_votes to 5 and quorum to 3.
> 
> Repeating the above loop, in the next event, 2 more nodes are allowed to
> die. etc. etc.
> 
> Signed-off-by: Fabio M. Di Nitto <fdinitto@xxxxxxxxxx>
> ---
> :100644 100644 0099cd7... 7a2078b... M	exec/coroparse.c
> :100644 100644 9d46269... e56654a... M	services/votequorum.c
>  exec/coroparse.c      |    2 ++
>  services/votequorum.c |   39 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 41 insertions(+), 0 deletions(-)
> 
> diff --git a/exec/coroparse.c b/exec/coroparse.c
> index 0099cd7..7a2078b 100644
> --- a/exec/coroparse.c
> +++ b/exec/coroparse.c
> @@ -360,6 +360,7 @@ static int main_config_parser_cb(const char *path,
>  			if ((strcmp(path, "quorum.expected_votes") == 0) ||
>  			    (strcmp(path, "quorum.votes") == 0) ||
>  			    (strcmp(path, "quorum.quorumdev_poll") == 0) ||
> +			    (strcmp(path, "quorum.last_man_standing_window") == 0) ||
>  			    (strcmp(path, "quorum.leaving_timeout") == 0)) {
>  				i = atoi(value);
>  				icmap_set_uint32(path, i);
> @@ -369,6 +370,7 @@ static int main_config_parser_cb(const char *path,
>  			if ((strcmp(path, "quorum.two_node") == 0) ||
>  			    (strcmp(path, "quorum.wait_for_all") == 0) ||
>  			    (strcmp(path, "quorum.auto_tie_breaker") == 0) ||
> +			    (strcmp(path, "quorum.last_man_standing") == 0) ||
>  			    (strcmp(path, "quorum.quorate") == 0)) {
>  				i = atoi(value);
>  				icmap_set_uint8(path, i);
> diff --git a/services/votequorum.c b/services/votequorum.c
> index 9d46269..e56654a 100644
> --- a/services/votequorum.c
> +++ b/services/votequorum.c
> @@ -79,6 +79,7 @@
>  #define DEFAULT_EXPECTED   1024
>  #define DEFAULT_QDEV_POLL 10000
>  #define DEFAULT_LEAVE_TMO 10000
> +#define DEFAULT_LMS_WIN   10000
>  
>  LOGSYS_DECLARE_SUBSYS ("VOTEQ");
>  
> @@ -121,9 +122,14 @@ static int cluster_is_quorate;
>  static int first_trans = 1;
>  static unsigned int quorumdev_poll = DEFAULT_QDEV_POLL;
>  static unsigned int leaving_timeout = DEFAULT_LEAVE_TMO;
> +
>  static uint8_t wait_for_all = 0;
>  static uint8_t auto_tie_breaker = 0;
>  static int lowest_node_id = -1;
> +static uint8_t last_man_standing = 0;
> +static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
> +static int last_man_standing_timer_set = 0;
> +static corosync_timer_handle_t last_man_standing_timer;
>  
>  static struct cluster_node *us;
>  static struct cluster_node *quorum_device = NULL;
> @@ -375,6 +381,8 @@ static void votequorum_init(struct corosync_api_v1 *api,
>  
>  	icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
>  	icmap_get_uint8("quorum.auto_tie_breaker", &auto_tie_breaker);
> +	icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
> +	icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
>  
>  	/*
>  	 * TODO: we need to know the lowest node-id in the cluster
> @@ -875,6 +883,16 @@ static int quorum_exec_send_reconfigure(int param, int nodeid, int value)
>  	return ret;
>  }
>  
> +static void lms_timer_fn(void *arg)
> +{
> +	ENTER();
> +	last_man_standing_timer_set = 0;
> +	if (cluster_is_quorate) {
> +		recalculate_quorum(1,1);
> +	}
> +	LEAVE();
> +}
> +
>  static void quorum_confchg_fn (
>  	enum totem_configuration_type configuration_type,
>  	const unsigned int *member_list, size_t member_list_entries,
> @@ -902,6 +920,17 @@ static void quorum_confchg_fn (
>  		}
>  	}
>  
> +	if (last_man_standing) {
> +		if ((member_list_entries >= quorum) && (left_list_entries)) {
> +			if (last_man_standing_timer_set) {
> +				corosync_api->timer_delete(last_man_standing_timer);
> +				last_man_standing_timer_set = 0;
> +			}
> +			corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000, NULL, lms_timer_fn, &last_man_standing_timer);
> +			last_man_standing_timer_set = 1;
> +		}
> +	}
> +
>  	if (member_list_entries) {
>  		memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
>  		quorum_members_entries = member_list_entries;
> @@ -975,6 +1004,16 @@ static void message_handler_req_exec_votequorum_nodeinfo (
>  
>  	log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message: votes: %d, expected: %d wfa: %d\n", req_exec_quorum_nodeinfo->votes, req_exec_quorum_nodeinfo->expected_votes, req_exec_quorum_nodeinfo->wait_for_all);
>  
> +	if ((last_man_standing) && (req_exec_quorum_nodeinfo->votes > 1)) {
> +		log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
> +						 "cluster nodes votes are set to 1. Disabling LMS.");
> +		last_man_standing = 0;
> +		if (last_man_standing_timer_set) {
> +			corosync_api->timer_delete(last_man_standing_timer);
> +			last_man_standing_timer_set = 0;
> +		}
> +	}
> +
>  	node->flags &= ~NODE_FLAGS_BEENDOWN;
>  
>  	if (new_node || req_exec_quorum_nodeinfo->first_trans || 

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux