Re: [PATCH 2/2] flatiron cpg: Process join list after downlists

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Have you tried Dave's cpg test tool?  Would be interesting to see if
this introduces any other regressions.

Reviewed-by: Steven Dake <sdake@xxxxxxxxxx>

On 06/13/2012 07:07 AM, Jan Friesse wrote:
> let's say following situation will happen:
> - we have 3 nodes
> - on wire messages looks like D1,J1,D2,J2,D3,J3 (D is downlist, J is
>   joinlist)
> - let's say, D1 and D3 contains node 2
> - it means that J2 is applied, but right after that, D1 (or D3) is
>   applied what means, node 2 is again considered down
> 
> It's solved by collecting joinlists and apply them after downlist, so
> order is:
> - apply best matching downlist
> - apply all joinlists
> 
> Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx>
> ---
>  services/cpg.c |  116 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 109 insertions(+), 7 deletions(-)
> 
> diff --git a/services/cpg.c b/services/cpg.c
> index dbd61c5..7e62260 100644
> --- a/services/cpg.c
> +++ b/services/cpg.c
> @@ -135,6 +135,7 @@ enum cpg_downlist_state_e {
>  };
>  static enum cpg_downlist_state_e downlist_state;
>  static struct list_head downlist_messages_head;
> +static struct list_head joinlist_messages_head;
>  
>  struct cpg_pd {
>  	void *conn;
> @@ -264,6 +265,10 @@ static void downlist_messages_delete (void);
>  
>  static void downlist_master_choose_and_send (void);
>  
> +static void joinlist_inform_clients (void);
> +
> +static void joinlist_messages_delete (void);
> +
>  static void cpg_sync_init_v2 (
>  	const unsigned int *trans_list,
>  	size_t trans_list_entries,
> @@ -277,11 +282,20 @@ static void cpg_sync_activate (void);
>  
>  static void cpg_sync_abort (void);
>  
> +static void do_proc_join(
> +	const mar_cpg_name_t *name,
> +	uint32_t pid,
> +	unsigned int nodeid,
> +	int reason);
> +
>  static int notify_lib_totem_membership (
>  	void *conn,
>  	int member_list_entries,
>  	const unsigned int *member_list);
>  
> +static char *cpg_print_group_name (
> +	const mar_cpg_name_t *group);
> +
>  /*
>   * Library Handler Definition
>   */
> @@ -460,8 +474,45 @@ struct downlist_msg {
>  	struct list_head list;
>  };
>  
> +struct joinlist_msg {
> +	mar_uint32_t sender_nodeid;
> +	uint32_t pid;
> +	mar_cpg_name_t group_name;
> +	struct list_head list;
> +};
> +
>  static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
>  
> +/*
> + * Function print group name. It's not reentrant
> + */
> +static char *cpg_print_group_name(const mar_cpg_name_t *group)
> +{
> +	static char res[CPG_MAX_NAME_LENGTH * 4 + 1];
> +	int dest_pos = 0;
> +	char c;
> +	int i;
> +
> +	for (i = 0; i < group->length; i++) {
> +		c = group->value[i];
> +
> +		if (c >= ' ' && c < 0x7f && c != '\\') {
> +			res[dest_pos++] = c;
> +                } else {
> +			if (c == '\\') {
> +				res[dest_pos++] = '\\';
> +				res[dest_pos++] = '\\';
> +			} else {
> +				snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c);
> +				dest_pos += 4;
> +			}
> +		}
> +	}
> +	res[dest_pos] = 0;
> +
> +	return (res);
> +}
> +
>  static void cpg_sync_init_v2 (
>  	const unsigned int *trans_list,
>  	size_t trans_list_entries,
> @@ -531,8 +582,11 @@ static void cpg_sync_activate (void)
>  		downlist_master_choose_and_send ();
>  	}
>  
> +	joinlist_inform_clients ();
> +
>  	downlist_messages_delete ();
>  	downlist_state = CPG_DOWNLIST_NONE;
> +	joinlist_messages_delete ();
>  
>  	notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
>  }
> @@ -541,6 +595,7 @@ static void cpg_sync_abort (void)
>  {
>  	downlist_state = CPG_DOWNLIST_NONE;
>  	downlist_messages_delete ();
> +	joinlist_messages_delete ();
>  }
>  
>  static int notify_lib_totem_membership (
> @@ -817,6 +872,34 @@ static void downlist_master_choose_and_send (void)
>  	}
>  }
>  
> +static void joinlist_inform_clients (void)
> +{
> +	struct joinlist_msg *stored_msg;
> +	struct list_head *iter;
> +	unsigned int i;
> +
> +	i = 0;
> +	for (iter = joinlist_messages_head.next;
> +		iter != &joinlist_messages_head;
> +		iter = iter->next) {
> +
> +		stored_msg = list_entry(iter, struct joinlist_msg, list);
> +
> +		log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d",
> +			i++, cpg_print_group_name(&stored_msg->group_name),
> +			(char*)api->totem_ifaces_print(stored_msg->sender_nodeid),
> +			stored_msg->pid);
> +
> +		/* Ignore our own messages */
> +		if (stored_msg->sender_nodeid == api->totem_nodeid_get()) {
> +			continue ;
> +		}
> +
> +		do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid,
> +			CONFCHG_CPG_REASON_NODEUP);
> +	}
> +}
> +
>  static void downlist_messages_delete (void)
>  {
>  	struct downlist_msg *stored_msg;
> @@ -834,6 +917,23 @@ static void downlist_messages_delete (void)
>  	}
>  }
>  
> +static void joinlist_messages_delete (void)
> +{
> +	struct joinlist_msg *stored_msg;
> +	struct list_head *iter, *iter_next;
> +
> +	for (iter = joinlist_messages_head.next;
> +		iter != &joinlist_messages_head;
> +		iter = iter_next) {
> +
> +		iter_next = iter->next;
> +
> +		stored_msg = list_entry(iter, struct joinlist_msg, list);
> +		list_del (&stored_msg->list);
> +		free (stored_msg);
> +	}
> +	list_init (&joinlist_messages_head);
> +}
>  
>  static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
>  {
> @@ -841,6 +941,7 @@ static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
>  	logsys_subsys_init();
>  #endif
>  	list_init (&downlist_messages_head);
> +	list_init (&joinlist_messages_head);
>  	api = corosync_api;
>  	return (0);
>  }
> @@ -1157,18 +1258,19 @@ static void message_handler_req_exec_cpg_joinlist (
>  	const char *message = message_v;
>  	const coroipc_response_header_t *res = (const coroipc_response_header_t *)message;
>  	const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(coroipc_response_header_t));
> +	struct joinlist_msg *stored_msg;
>  
>  	log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n",
>  		nodeid);
>  
> -	/* Ignore our own messages */
> -	if (nodeid == api->totem_nodeid_get()) {
> -		return;
> -	}
> -
>  	while ((const char*)jle < message + res->size) {
> -		do_proc_join (&jle->group_name, jle->pid, nodeid,
> -			CONFCHG_CPG_REASON_NODEUP);
> +		stored_msg = malloc (sizeof (struct joinlist_msg));
> +		memset(stored_msg, 0, sizeof (struct joinlist_msg));
> +		stored_msg->sender_nodeid = nodeid;
> +		stored_msg->pid = jle->pid;
> +		memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t));
> +		list_init (&stored_msg->list);
> +		list_add (&stored_msg->list, &joinlist_messages_head);
>  		jle++;
>  	}
>  }

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux