To follow spec it's needed to send messages to all nodes (not only active members) from time to time to detect merge. This is needed in situations when totemsrp merge timer isn't running (because there is enough messages sent by processors) to detect merge. Example scenario: - 3 nodes, all of them running cpgverify - One node is isolated (iptables for example) - Node is un-isolated Without this commit, node will not merge as long as the cpgverify is running. Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx> --- exec/totemudpu.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 66 insertions(+), 1 deletions(-) diff --git a/exec/totemudpu.c b/exec/totemudpu.c index ef4449f..69837c7 100644 --- a/exec/totemudpu.c +++ b/exec/totemudpu.c @@ -178,6 +178,12 @@ struct totemudpu_instance { struct totem_ip_address token_target; int token_socket; + + qb_loop_timer_handle timer_merge_detect_timeout; + + int send_merge_detect_message; + + unsigned int merge_detect_messages_sent_before_timeout; }; struct work_item { @@ -198,6 +204,12 @@ static int totemudpu_create_sending_socket( int totemudpu_member_list_rebind_ip ( void *udpu_context); +static void totemudpu_start_merge_detect_timeout( + void *udpu_context); + +static void totemudpu_stop_merge_detect_timeout( + void *udpu_context); + static struct totem_ip_address localhost; static void totemudpu_instance_initialize (struct totemudpu_instance *instance) @@ -357,7 +369,11 @@ static inline void mcast_sendmsg ( struct totemudpu_member, list); - if (only_active && !member->active) + /* + * Do not send multicast message if message is not "flush", member + * is inactive and timeout for sending merge message didn't expired. + */ + if (only_active && !member->active && !instance->send_merge_detect_message) continue ; totemip_totemip_to_sockaddr_convert(&member->member, @@ -392,6 +408,14 @@ static inline void mcast_sendmsg ( "sendmsg(mcast) failed (non-critical)"); } } + + if (!only_active || instance->send_merge_detect_message) { + /* + * Current message was sent to all nodes + */ + instance->merge_detect_messages_sent_before_timeout++; + instance->send_merge_detect_message = 0; + } } int totemudpu_finalize ( @@ -406,6 +430,8 @@ int totemudpu_finalize ( close (instance->token_socket); } + totemudpu_stop_merge_detect_timeout(instance); + return (res); } @@ -824,6 +850,8 @@ int totemudpu_initialize ( timer_function_netif_check_timeout, &instance->timer_netif_check_timeout); + totemudpu_start_merge_detect_timeout(instance); + *udpu_context = instance; return (0); } @@ -1217,3 +1245,40 @@ int totemudpu_member_set_active ( return (0); } + +static void timer_function_merge_detect_timeout ( + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + + if (instance->merge_detect_messages_sent_before_timeout == 0) { + instance->send_merge_detect_message = 1; + } + + instance->merge_detect_messages_sent_before_timeout = 0; + + totemudpu_start_merge_detect_timeout(instance); +} + +static void totemudpu_start_merge_detect_timeout( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + qb_loop_timer_add(instance->totemudpu_poll_handle, + QB_LOOP_MED, + instance->totem_config->merge_timeout * 2 * QB_TIME_NS_IN_MSEC, + (void *)instance, + timer_function_merge_detect_timeout, + &instance->timer_merge_detect_timeout); + +} + +static void totemudpu_stop_merge_detect_timeout( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + qb_loop_timer_del(instance->totemudpu_poll_handle, + instance->timer_merge_detect_timeout); +} -- 1.7.1 _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss