Have you tried Dave's cpg test tool? Would be interesting to see if this introduces any other regressions. Reviewed-by: Steven Dake <sdake@xxxxxxxxxx> On 06/13/2012 07:07 AM, Jan Friesse wrote: > let's say following situation will happen: > - we have 3 nodes > - on wire messages looks like D1,J1,D2,J2,D3,J3 (D is downlist, J is > joinlist) > - let's say, D1 and D3 contains node 2 > - it means that J2 is applied, but right after that, D1 (or D3) is > applied what means, node 2 is again considered down > > It's solved by collecting joinlists and apply them after downlist, so > order is: > - apply best matching downlist > - apply all joinlists > > Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx> > --- > services/cpg.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- > 1 files changed, 109 insertions(+), 7 deletions(-) > > diff --git a/services/cpg.c b/services/cpg.c > index dbd61c5..7e62260 100644 > --- a/services/cpg.c > +++ b/services/cpg.c > @@ -135,6 +135,7 @@ enum cpg_downlist_state_e { > }; > static enum cpg_downlist_state_e downlist_state; > static struct list_head downlist_messages_head; > +static struct list_head joinlist_messages_head; > > struct cpg_pd { > void *conn; > @@ -264,6 +265,10 @@ static void downlist_messages_delete (void); > > static void downlist_master_choose_and_send (void); > > +static void joinlist_inform_clients (void); > + > +static void joinlist_messages_delete (void); > + > static void cpg_sync_init_v2 ( > const unsigned int *trans_list, > size_t trans_list_entries, > @@ -277,11 +282,20 @@ static void cpg_sync_activate (void); > > static void cpg_sync_abort (void); > > +static void do_proc_join( > + const mar_cpg_name_t *name, > + uint32_t pid, > + unsigned int nodeid, > + int reason); > + > static int notify_lib_totem_membership ( > void *conn, > int member_list_entries, > const unsigned int *member_list); > > +static char *cpg_print_group_name ( > + const mar_cpg_name_t *group); > + > /* > * Library Handler Definition > */ > @@ -460,8 +474,45 @@ struct downlist_msg { > struct list_head list; > }; > > +struct joinlist_msg { > + mar_uint32_t sender_nodeid; > + uint32_t pid; > + mar_cpg_name_t group_name; > + struct list_head list; > +}; > + > static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; > > +/* > + * Function print group name. It's not reentrant > + */ > +static char *cpg_print_group_name(const mar_cpg_name_t *group) > +{ > + static char res[CPG_MAX_NAME_LENGTH * 4 + 1]; > + int dest_pos = 0; > + char c; > + int i; > + > + for (i = 0; i < group->length; i++) { > + c = group->value[i]; > + > + if (c >= ' ' && c < 0x7f && c != '\\') { > + res[dest_pos++] = c; > + } else { > + if (c == '\\') { > + res[dest_pos++] = '\\'; > + res[dest_pos++] = '\\'; > + } else { > + snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c); > + dest_pos += 4; > + } > + } > + } > + res[dest_pos] = 0; > + > + return (res); > +} > + > static void cpg_sync_init_v2 ( > const unsigned int *trans_list, > size_t trans_list_entries, > @@ -531,8 +582,11 @@ static void cpg_sync_activate (void) > downlist_master_choose_and_send (); > } > > + joinlist_inform_clients (); > + > downlist_messages_delete (); > downlist_state = CPG_DOWNLIST_NONE; > + joinlist_messages_delete (); > > notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); > } > @@ -541,6 +595,7 @@ static void cpg_sync_abort (void) > { > downlist_state = CPG_DOWNLIST_NONE; > downlist_messages_delete (); > + joinlist_messages_delete (); > } > > static int notify_lib_totem_membership ( > @@ -817,6 +872,34 @@ static void downlist_master_choose_and_send (void) > } > } > > +static void joinlist_inform_clients (void) > +{ > + struct joinlist_msg *stored_msg; > + struct list_head *iter; > + unsigned int i; > + > + i = 0; > + for (iter = joinlist_messages_head.next; > + iter != &joinlist_messages_head; > + iter = iter->next) { > + > + stored_msg = list_entry(iter, struct joinlist_msg, list); > + > + log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d", > + i++, cpg_print_group_name(&stored_msg->group_name), > + (char*)api->totem_ifaces_print(stored_msg->sender_nodeid), > + stored_msg->pid); > + > + /* Ignore our own messages */ > + if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { > + continue ; > + } > + > + do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid, > + CONFCHG_CPG_REASON_NODEUP); > + } > +} > + > static void downlist_messages_delete (void) > { > struct downlist_msg *stored_msg; > @@ -834,6 +917,23 @@ static void downlist_messages_delete (void) > } > } > > +static void joinlist_messages_delete (void) > +{ > + struct joinlist_msg *stored_msg; > + struct list_head *iter, *iter_next; > + > + for (iter = joinlist_messages_head.next; > + iter != &joinlist_messages_head; > + iter = iter_next) { > + > + iter_next = iter->next; > + > + stored_msg = list_entry(iter, struct joinlist_msg, list); > + list_del (&stored_msg->list); > + free (stored_msg); > + } > + list_init (&joinlist_messages_head); > +} > > static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) > { > @@ -841,6 +941,7 @@ static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) > logsys_subsys_init(); > #endif > list_init (&downlist_messages_head); > + list_init (&joinlist_messages_head); > api = corosync_api; > return (0); > } > @@ -1157,18 +1258,19 @@ static void message_handler_req_exec_cpg_joinlist ( > const char *message = message_v; > const coroipc_response_header_t *res = (const coroipc_response_header_t *)message; > const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(coroipc_response_header_t)); > + struct joinlist_msg *stored_msg; > > log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n", > nodeid); > > - /* Ignore our own messages */ > - if (nodeid == api->totem_nodeid_get()) { > - return; > - } > - > while ((const char*)jle < message + res->size) { > - do_proc_join (&jle->group_name, jle->pid, nodeid, > - CONFCHG_CPG_REASON_NODEUP); > + stored_msg = malloc (sizeof (struct joinlist_msg)); > + memset(stored_msg, 0, sizeof (struct joinlist_msg)); > + stored_msg->sender_nodeid = nodeid; > + stored_msg->pid = jle->pid; > + memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t)); > + list_init (&stored_msg->list); > + list_add (&stored_msg->list, &joinlist_messages_head); > jle++; > } > } _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss