let's say following situation will happen: - we have 3 nodes - on wire messages looks like D1,J1,D2,J2,D3,J3 (D is downlist, J is joinlist) - let's say, D1 and D3 contains node 2 - it means that J2 is applied, but right after that, D1 (or D3) is applied what means, node 2 is again considered down It's solved by collecting joinlists and apply them after downlist, so order is: - apply best matching downlist - apply all joinlists Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx> --- exec/cpg.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 76 insertions(+), 7 deletions(-) diff --git a/exec/cpg.c b/exec/cpg.c index f7acbc8..604ffb3 100644 --- a/exec/cpg.c +++ b/exec/cpg.c @@ -143,6 +143,7 @@ enum cpg_downlist_state_e { }; static enum cpg_downlist_state_e downlist_state; static struct list_head downlist_messages_head; +static struct list_head joinlist_messages_head; struct cpg_pd { void *conn; @@ -285,6 +286,10 @@ static void downlist_messages_delete (void); static void downlist_master_choose_and_send (void); +static void joinlist_inform_clients (void); + +static void joinlist_messages_delete (void); + static void cpg_sync_init ( const unsigned int *trans_list, size_t trans_list_entries, @@ -298,6 +303,12 @@ static void cpg_sync_activate (void); static void cpg_sync_abort (void); +static void do_proc_join( + const mar_cpg_name_t *name, + uint32_t pid, + unsigned int nodeid, + int reason); + static int notify_lib_totem_membership ( void *conn, int member_list_entries, @@ -456,6 +467,13 @@ struct downlist_msg { struct list_head list; }; +struct joinlist_msg { + mar_uint32_t sender_nodeid; + uint32_t pid; + mar_cpg_name_t group_name; + struct list_head list; +}; + static struct req_exec_cpg_downlist g_req_exec_cpg_downlist; static void cpg_sync_init ( @@ -527,8 +545,11 @@ static void cpg_sync_activate (void) downlist_master_choose_and_send (); } + joinlist_inform_clients (); + downlist_messages_delete (); downlist_state = CPG_DOWNLIST_NONE; + joinlist_messages_delete (); notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list); } @@ -537,6 +558,7 @@ static void cpg_sync_abort (void) { downlist_state = CPG_DOWNLIST_NONE; downlist_messages_delete (); + joinlist_messages_delete (); } static int notify_lib_totem_membership ( @@ -865,6 +887,34 @@ static void downlist_master_choose_and_send (void) qb_map_destroy(group_map); } +static void joinlist_inform_clients (void) +{ + struct joinlist_msg *stored_msg; + struct list_head *iter; + unsigned int i; + + i = 0; + for (iter = joinlist_messages_head.next; + iter != &joinlist_messages_head; + iter = iter->next) { + + stored_msg = list_entry(iter, struct joinlist_msg, list); + + log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%d, ip:%s, pid:%d", + i++, stored_msg->group_name.value, + (char*)api->totem_ifaces_print(stored_msg->sender_nodeid), + stored_msg->pid); + + /* Ignore our own messages */ + if (stored_msg->sender_nodeid == api->totem_nodeid_get()) { + continue ; + } + + do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid, + CONFCHG_CPG_REASON_NODEUP); + } +} + static void downlist_messages_delete (void) { struct downlist_msg *stored_msg; @@ -882,6 +932,23 @@ static void downlist_messages_delete (void) } } +static void joinlist_messages_delete (void) +{ + struct joinlist_msg *stored_msg; + struct list_head *iter, *iter_next; + + for (iter = joinlist_messages_head.next; + iter != &joinlist_messages_head; + iter = iter_next) { + + iter_next = iter->next; + + stored_msg = list_entry(iter, struct joinlist_msg, list); + list_del (&stored_msg->list); + free (stored_msg); + } + list_init (&joinlist_messages_head); +} static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) { @@ -889,6 +956,7 @@ static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api) logsys_subsys_init(); #endif list_init (&downlist_messages_head); + list_init (&joinlist_messages_head); api = corosync_api; return (NULL); } @@ -1199,18 +1267,19 @@ static void message_handler_req_exec_cpg_joinlist ( const char *message = message_v; const struct qb_ipc_response_header *res = (const struct qb_ipc_response_header *)message; const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(struct qb_ipc_response_header)); + struct joinlist_msg *stored_msg; log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x", nodeid); - /* Ignore our own messages */ - if (nodeid == api->totem_nodeid_get()) { - return; - } - while ((const char*)jle < message + res->size) { - do_proc_join (&jle->group_name, jle->pid, nodeid, - CONFCHG_CPG_REASON_NODEUP); + stored_msg = malloc (sizeof (struct joinlist_msg)); + memset(stored_msg, 0, sizeof (struct joinlist_msg)); + stored_msg->sender_nodeid = nodeid; + stored_msg->pid = jle->pid; + memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t)); + list_init (&stored_msg->list); + list_add (&stored_msg->list, &joinlist_messages_head); jle++; } } -- 1.7.1 _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss