[PATCH 2/2] Return back "Totem is unable to form..." message

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch returns back SUBJ functionality. It rely on fact, that
sendmsg will return error, and if such error is returned for long time,
it's probably because of firewall.

Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx>
---
 exec/main.c                    |    9 ++++++++-
 exec/totemiba.c                |    3 +++
 exec/totemiba.h                |    1 +
 exec/totemnet.c                |    4 +++-
 exec/totemnet.h                |    1 +
 exec/totemrrp.c                |    1 +
 exec/totemudp.c                |    8 ++++++++
 exec/totemudp.h                |    1 +
 exec/totemudpu.c               |    5 +++++
 exec/totemudpu.h               |    1 +
 include/corosync/totem/totem.h |    5 +++++
 11 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/exec/main.c b/exec/main.c
index fc7c8c3..3dac5fb 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -491,15 +491,22 @@ static void corosync_totem_stats_updater (void *data)
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
 	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
+	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
+	    stats->mrp->srp->continuous_sendmsg_failures);
+
 	icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
 		stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
 
-	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER) {
+	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+	    stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
 		log_printf (LOGSYS_LEVEL_WARNING,
 			"Totem is unable to form a cluster because of an "
 			"operating system or network fault. The most common "
 			"cause of this message is that the local firewall is "
 			"configured improperly.");
+		icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
+	} else {
+		icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
 	}
 
 	for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
diff --git a/exec/totemiba.c b/exec/totemiba.c
index a419d1a..55f7924 100644
--- a/exec/totemiba.c
+++ b/exec/totemiba.c
@@ -98,6 +98,8 @@ struct totemiba_instance {
 
 	struct totem_config *totem_config;
 
+	totemsrp_stats_t *stats;
+
 	void (*totemiba_iface_change_fn) (
 		void *context,
 		const struct totem_ip_address *iface_address);
@@ -1320,6 +1322,7 @@ int totemiba_initialize (
 	instance->totemiba_iface_change_fn = iface_change_fn;
 
 	instance->totem_config = totem_config;
+	instance->stats = stats;
 
 	instance->rrp_context = context;
 
diff --git a/exec/totemiba.h b/exec/totemiba.h
index de19756..7e7a689 100644
--- a/exec/totemiba.h
+++ b/exec/totemiba.h
@@ -48,6 +48,7 @@ extern int totemiba_initialize (
 	qb_loop_t* qb_poll_handle,
 	void **iba_handle,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
diff --git a/exec/totemnet.c b/exec/totemnet.c
index fd7c76e..2571d92 100644
--- a/exec/totemnet.c
+++ b/exec/totemnet.c
@@ -55,6 +55,7 @@ struct transport {
 		qb_loop_t *loop_pt,
 		void **transport_instance,
 		struct totem_config *totem_config,
+		totemsrp_stats_t *stats,
 		int interface_no,
 		void *context,
 
@@ -272,6 +273,7 @@ int totemnet_initialize (
 	qb_loop_t *loop_pt,
 	void **net_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
@@ -297,7 +299,7 @@ int totemnet_initialize (
 	totemnet_instance_initialize (instance, totem_config);
 
 	res = instance->transport->initialize (loop_pt,
-		&instance->transport_context, totem_config,
+		&instance->transport_context, totem_config, stats,
 		interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
 
 	if (res == -1) {
diff --git a/exec/totemnet.h b/exec/totemnet.h
index 232c5cf..0adc107 100644
--- a/exec/totemnet.h
+++ b/exec/totemnet.h
@@ -58,6 +58,7 @@ extern int totemnet_initialize (
 	qb_loop_t *poll_handle,
 	void **net_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
index de6cd39..0623892 100644
--- a/exec/totemrrp.c
+++ b/exec/totemrrp.c
@@ -1937,6 +1937,7 @@ int totemrrp_initialize (
 			poll_handle,
 			&instance->net_handles[i],
 			totem_config,
+			stats,
 			i,
 			(void *)deliver_fn_context,
 			rrp_deliver_fn,
diff --git a/exec/totemudp.c b/exec/totemudp.c
index e702a32..a5169c2 100644
--- a/exec/totemudp.c
+++ b/exec/totemudp.c
@@ -189,6 +189,8 @@ struct totemudp_instance {
 
 	struct totem_config *totem_config;
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 };
 
@@ -387,6 +389,9 @@ static inline void mcast_sendmsg (
 	if (res < 0) {
 		LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
 			"sendmsg(mcast) failed (non-critical)");
+		instance->stats->continuous_sendmsg_failures++;
+	} else {
+		instance->stats->continuous_sendmsg_failures = 0;
 	}
 
 	/*
@@ -1097,6 +1102,7 @@ int totemudp_initialize (
 	qb_loop_t *poll_handle,
 	void **udp_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
@@ -1122,6 +1128,8 @@ int totemudp_initialize (
 	totemudp_instance_initialize (instance);
 
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	* Configure logging
 	*/
diff --git a/exec/totemudp.h b/exec/totemudp.h
index ba22b4b..697307a 100644
--- a/exec/totemudp.h
+++ b/exec/totemudp.h
@@ -48,6 +48,7 @@ extern int totemudp_initialize (
 	qb_loop_t* poll_handle,
 	void **udp_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
diff --git a/exec/totemudpu.c b/exec/totemudpu.c
index ed3fa60..12ec63c 100644
--- a/exec/totemudpu.c
+++ b/exec/totemudpu.c
@@ -172,6 +172,8 @@ struct totemudpu_instance {
 
 	struct totem_config *totem_config;
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 
 	int token_socket;
@@ -731,6 +733,7 @@ int totemudpu_initialize (
 	qb_loop_t *poll_handle,
 	void **udpu_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
@@ -756,6 +759,8 @@ int totemudpu_initialize (
 	totemudpu_instance_initialize (instance);
 
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	* Configure logging
 	*/
diff --git a/exec/totemudpu.h b/exec/totemudpu.h
index 136960c..7e80ed7 100644
--- a/exec/totemudpu.h
+++ b/exec/totemudpu.h
@@ -48,6 +48,7 @@ extern int totemudpu_initialize (
 	qb_loop_t *poll_handle,
 	void **udpu_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
index 11fb581..02a8a2c 100644
--- a/include/corosync/totem/totem.h
+++ b/include/corosync/totem/totem.h
@@ -56,6 +56,10 @@
  * Maximum number of continuous gather states
  */
 #define MAX_NO_CONT_GATHER	3
+/*
+ * Maximum number of continuous failures get from sendmsg call
+ */
+#define MAX_NO_CONT_SENDMSG_FAILURES	30
 
 struct totem_interface {
 	struct totem_ip_address bindnet;
@@ -252,6 +256,7 @@ typedef struct {
 	uint64_t consensus_timeouts;
 	uint64_t rx_msg_dropped;
 	uint32_t continuous_gather;
+	uint32_t continuous_sendmsg_failures;
 
 	int earliest_token;
 	int latest_token;
-- 
1.7.1

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux