New key with faulty status of ring is created in cmap as name runtime.totem.pg.mrp.rrp.$ring_number.faulty Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx> --- exec/main.c | 7 ++++- exec/totemrrp.c | 50 ++++++++++++++++++++++++++++++++++++++- exec/totemrrp.h | 1 + exec/totemsrp.c | 1 + include/corosync/totem/totem.h | 2 + 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/exec/main.c b/exec/main.c index bb4d13f..6c1ea35 100644 --- a/exec/main.c +++ b/exec/main.c @@ -495,8 +495,9 @@ static void corosync_totem_stats_updater (void *data) uint32_t total_mtt_rx_token; uint32_t total_backlog_calc; uint32_t total_token_holdtime; - int t, prev; + int t, prev, i; int32_t token_count; + char key_name[ICMAP_KEYNAME_MAXLEN]; stats = api->totem_get_stats(); @@ -529,6 +530,10 @@ static void corosync_totem_stats_updater (void *data) icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0); + for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) { + snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "runtime.totem.pg.mrp.rrp.%u.faulty", i); + icmap_set_uint8(key_name, stats->mrp->srp->rrp->faulty[i]); + } total_mtt_rx_token = 0; total_token_holdtime = 0; total_backlog_calc = 0; diff --git a/exec/totemrrp.c b/exec/totemrrp.c index 89b9fd6..805763e 100644 --- a/exec/totemrrp.c +++ b/exec/totemrrp.c @@ -249,8 +249,13 @@ struct totemrrp_instance { void *deliver_fn_context[INTERFACE_MAX]; qb_loop_timer_handle timer_active_test_ring_timeout[INTERFACE_MAX]; + + totemrrp_stats_t stats; }; +static void stats_set_interface_faulty(struct totemrrp_instance *rrp_instance, + unsigned int iface_no, int is_faulty); + /* * None Replication Forward Declerations */ @@ -597,6 +602,12 @@ do { \ format, ##args); \ } while (0); +static void stats_set_interface_faulty(struct totemrrp_instance *rrp_instance, + unsigned int iface_no, int is_faulty) +{ + rrp_instance->stats.faulty[iface_no] = (is_faulty ? 1 : 0); +} + static void test_active_msg_endian_convert(const struct message_header *in, struct message_header *out) { out->type = in->type; @@ -775,6 +786,7 @@ void *passive_instance_initialize ( int interface_count) { struct passive_instance *instance; + int i; instance = malloc (sizeof (struct passive_instance)); if (instance == 0) { @@ -790,6 +802,10 @@ void *passive_instance_initialize ( } memset (instance->faulty, 0, sizeof (int) * interface_count); + for (i = 0; i < interface_count; i++) { + stats_set_interface_faulty (rrp_instance, i, 0); + } + instance->token_recv_count = malloc (sizeof (int) * interface_count); if (instance->token_recv_count == 0) { free (instance->faulty); @@ -966,6 +982,7 @@ static void passive_monitor ( if ((passive_instance->faulty[i] == 0) && (max - recv_count[i] > threshold)) { passive_instance->faulty[i] = 1; + qb_loop_timer_add (rrp_instance->poll_handle, QB_LOOP_MED, rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC, @@ -973,6 +990,8 @@ static void passive_monitor ( timer_function_test_ring_timeout, &rrp_instance->timer_active_test_ring_timeout[i]); + stats_set_interface_faulty (rrp_instance, i, passive_instance->faulty[i]); + sprintf (rrp_instance->status[i], "Marking ringid %u interface %s FAULTY", i, @@ -1206,6 +1225,7 @@ static void passive_ring_reenable ( unsigned int iface_no) { struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance; + int i; memset (rrp_algo_instance->mcast_recv_count, 0, sizeof (unsigned int) * instance->interface_count); @@ -1215,8 +1235,12 @@ static void passive_ring_reenable ( if (iface_no == instance->interface_count) { memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) * instance->interface_count); + for (i = 0; i < instance->interface_count; i++) { + stats_set_interface_faulty (instance, i, 0); + } } else { rrp_algo_instance->faulty[iface_no] = 0; + stats_set_interface_faulty (instance, iface_no, 0); } } @@ -1228,6 +1252,7 @@ void *active_instance_initialize ( int interface_count) { struct active_instance *instance; + int i; instance = malloc (sizeof (struct active_instance)); if (instance == 0) { @@ -1243,6 +1268,10 @@ void *active_instance_initialize ( } memset (instance->faulty, 0, sizeof (unsigned int) * interface_count); + for (i = 0; i < interface_count; i++) { + stats_set_interface_faulty (rrp_instance, i, 0); + } + instance->last_token_recv = malloc (sizeof (int) * interface_count); if (instance->last_token_recv == 0) { free (instance->faulty); @@ -1333,9 +1362,10 @@ static void timer_function_active_token_expired (void *context) } } for (i = 0; i < rrp_instance->interface_count; i++) { - if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold) - { + if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold && + active_instance->faulty[i] == 0) { active_instance->faulty[i] = 1; + qb_loop_timer_add (rrp_instance->poll_handle, QB_LOOP_MED, rrp_instance->totem_config->rrp_autorecovery_check_timeout*QB_TIME_NS_IN_MSEC, @@ -1343,6 +1373,8 @@ static void timer_function_active_token_expired (void *context) timer_function_test_ring_timeout, &rrp_instance->timer_active_test_ring_timeout[i]); + stats_set_interface_faulty (rrp_instance, i, active_instance->faulty[i]); + sprintf (rrp_instance->status[i], "Marking seqid %d ringid %u interface %s FAULTY", active_instance->last_token_seq, @@ -1621,6 +1653,7 @@ static void active_ring_reenable ( unsigned int iface_no) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; + int i; if (iface_no == instance->interface_count) { memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) * @@ -1629,10 +1662,16 @@ static void active_ring_reenable ( instance->interface_count); memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) * instance->interface_count); + + for (i = 0; i < instance->interface_count; i++) { + stats_set_interface_faulty (instance, i, 0); + } } else { rrp_algo_instance->last_token_recv[iface_no] = 0; rrp_algo_instance->faulty[iface_no] = 0; rrp_algo_instance->counter_problems[iface_no] = 0; + + stats_set_interface_faulty (instance, iface_no, 0); } } @@ -1805,6 +1844,7 @@ int totemrrp_initialize ( qb_loop_t *poll_handle, void **rrp_context, struct totem_config *totem_config, + totemsrp_stats_t *stats, void *context, void (*deliver_fn) ( @@ -1838,6 +1878,11 @@ int totemrrp_initialize ( totemrrp_instance_initialize (instance); instance->totem_config = totem_config; + stats->rrp = &instance->stats; + if (totem_config->interface_count > 1) { + instance->stats.interface_count = totem_config->interface_count; + instance->stats.faulty = calloc(instance->stats.interface_count, sizeof(uint8_t)); + } res = totemrrp_algorithm_set ( instance->totem_config, @@ -1879,6 +1924,7 @@ int totemrrp_initialize ( instance->poll_handle = poll_handle; + for (i = 0; i < totem_config->interface_count; i++) { struct deliver_fn_context *deliver_fn_context; diff --git a/exec/totemrrp.h b/exec/totemrrp.h index 5ed3c0a..4416eab 100644 --- a/exec/totemrrp.h +++ b/exec/totemrrp.h @@ -58,6 +58,7 @@ extern int totemrrp_initialize ( qb_loop_t *poll_handle, void **rrp_context, struct totem_config *totem_config, + totemsrp_stats_t *stats, void *context, void (*deliver_fn) ( diff --git a/exec/totemsrp.c b/exec/totemsrp.c index 9d3d32d..5ab5e8f 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -934,6 +934,7 @@ int totemsrp_initialize ( poll_handle, &instance->totemrrp_context, totem_config, + stats->srp, instance, main_deliver_fn, main_iface_change_fn, diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index b6cd6f6..075d5a8 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -217,6 +217,8 @@ typedef struct { totem_stats_header_t hdr; totemnet_stats_t *net; char *algo_name; + uint8_t *faulty; + uint32_t interface_count; } totemrrp_stats_t; -- 1.7.1 _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss