[PATCH 2/2] flatiron cpg: Process join list after downlists

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



let's say following situation will happen:
- we have 3 nodes
- on wire messages looks like D1,J1,D2,J2,D3,J3 (D is downlist, J is
  joinlist)
- let's say, D1 and D3 contains node 2
- it means that J2 is applied, but right after that, D1 (or D3) is
  applied what means, node 2 is again considered down

It's solved by collecting joinlists and apply them after downlist, so
order is:
- apply best matching downlist
- apply all joinlists

Signed-off-by: Jan Friesse <jfriesse@xxxxxxxxxx>
---
 services/cpg.c |  116 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/services/cpg.c b/services/cpg.c
index dbd61c5..7e62260 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -135,6 +135,7 @@ enum cpg_downlist_state_e {
 };
 static enum cpg_downlist_state_e downlist_state;
 static struct list_head downlist_messages_head;
+static struct list_head joinlist_messages_head;
 
 struct cpg_pd {
 	void *conn;
@@ -264,6 +265,10 @@ static void downlist_messages_delete (void);
 
 static void downlist_master_choose_and_send (void);
 
+static void joinlist_inform_clients (void);
+
+static void joinlist_messages_delete (void);
+
 static void cpg_sync_init_v2 (
 	const unsigned int *trans_list,
 	size_t trans_list_entries,
@@ -277,11 +282,20 @@ static void cpg_sync_activate (void);
 
 static void cpg_sync_abort (void);
 
+static void do_proc_join(
+	const mar_cpg_name_t *name,
+	uint32_t pid,
+	unsigned int nodeid,
+	int reason);
+
 static int notify_lib_totem_membership (
 	void *conn,
 	int member_list_entries,
 	const unsigned int *member_list);
 
+static char *cpg_print_group_name (
+	const mar_cpg_name_t *group);
+
 /*
  * Library Handler Definition
  */
@@ -460,8 +474,45 @@ struct downlist_msg {
 	struct list_head list;
 };
 
+struct joinlist_msg {
+	mar_uint32_t sender_nodeid;
+	uint32_t pid;
+	mar_cpg_name_t group_name;
+	struct list_head list;
+};
+
 static struct req_exec_cpg_downlist g_req_exec_cpg_downlist;
 
+/*
+ * Function print group name. It's not reentrant
+ */
+static char *cpg_print_group_name(const mar_cpg_name_t *group)
+{
+	static char res[CPG_MAX_NAME_LENGTH * 4 + 1];
+	int dest_pos = 0;
+	char c;
+	int i;
+
+	for (i = 0; i < group->length; i++) {
+		c = group->value[i];
+
+		if (c >= ' ' && c < 0x7f && c != '\\') {
+			res[dest_pos++] = c;
+                } else {
+			if (c == '\\') {
+				res[dest_pos++] = '\\';
+				res[dest_pos++] = '\\';
+			} else {
+				snprintf(res + dest_pos, sizeof(res) - dest_pos, "\\x%02X", c);
+				dest_pos += 4;
+			}
+		}
+	}
+	res[dest_pos] = 0;
+
+	return (res);
+}
+
 static void cpg_sync_init_v2 (
 	const unsigned int *trans_list,
 	size_t trans_list_entries,
@@ -531,8 +582,11 @@ static void cpg_sync_activate (void)
 		downlist_master_choose_and_send ();
 	}
 
+	joinlist_inform_clients ();
+
 	downlist_messages_delete ();
 	downlist_state = CPG_DOWNLIST_NONE;
+	joinlist_messages_delete ();
 
 	notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
 }
@@ -541,6 +595,7 @@ static void cpg_sync_abort (void)
 {
 	downlist_state = CPG_DOWNLIST_NONE;
 	downlist_messages_delete ();
+	joinlist_messages_delete ();
 }
 
 static int notify_lib_totem_membership (
@@ -817,6 +872,34 @@ static void downlist_master_choose_and_send (void)
 	}
 }
 
+static void joinlist_inform_clients (void)
+{
+	struct joinlist_msg *stored_msg;
+	struct list_head *iter;
+	unsigned int i;
+
+	i = 0;
+	for (iter = joinlist_messages_head.next;
+		iter != &joinlist_messages_head;
+		iter = iter->next) {
+
+		stored_msg = list_entry(iter, struct joinlist_msg, list);
+
+		log_printf (LOG_DEBUG, "joinlist_messages[%u] group:%s, ip:%s, pid:%d",
+			i++, cpg_print_group_name(&stored_msg->group_name),
+			(char*)api->totem_ifaces_print(stored_msg->sender_nodeid),
+			stored_msg->pid);
+
+		/* Ignore our own messages */
+		if (stored_msg->sender_nodeid == api->totem_nodeid_get()) {
+			continue ;
+		}
+
+		do_proc_join (&stored_msg->group_name, stored_msg->pid, stored_msg->sender_nodeid,
+			CONFCHG_CPG_REASON_NODEUP);
+	}
+}
+
 static void downlist_messages_delete (void)
 {
 	struct downlist_msg *stored_msg;
@@ -834,6 +917,23 @@ static void downlist_messages_delete (void)
 	}
 }
 
+static void joinlist_messages_delete (void)
+{
+	struct joinlist_msg *stored_msg;
+	struct list_head *iter, *iter_next;
+
+	for (iter = joinlist_messages_head.next;
+		iter != &joinlist_messages_head;
+		iter = iter_next) {
+
+		iter_next = iter->next;
+
+		stored_msg = list_entry(iter, struct joinlist_msg, list);
+		list_del (&stored_msg->list);
+		free (stored_msg);
+	}
+	list_init (&joinlist_messages_head);
+}
 
 static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
 {
@@ -841,6 +941,7 @@ static int cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
 	logsys_subsys_init();
 #endif
 	list_init (&downlist_messages_head);
+	list_init (&joinlist_messages_head);
 	api = corosync_api;
 	return (0);
 }
@@ -1157,18 +1258,19 @@ static void message_handler_req_exec_cpg_joinlist (
 	const char *message = message_v;
 	const coroipc_response_header_t *res = (const coroipc_response_header_t *)message;
 	const struct join_list_entry *jle = (const struct join_list_entry *)(message + sizeof(coroipc_response_header_t));
+	struct joinlist_msg *stored_msg;
 
 	log_printf(LOGSYS_LEVEL_DEBUG, "got joinlist message from node %x\n",
 		nodeid);
 
-	/* Ignore our own messages */
-	if (nodeid == api->totem_nodeid_get()) {
-		return;
-	}
-
 	while ((const char*)jle < message + res->size) {
-		do_proc_join (&jle->group_name, jle->pid, nodeid,
-			CONFCHG_CPG_REASON_NODEUP);
+		stored_msg = malloc (sizeof (struct joinlist_msg));
+		memset(stored_msg, 0, sizeof (struct joinlist_msg));
+		stored_msg->sender_nodeid = nodeid;
+		stored_msg->pid = jle->pid;
+		memcpy(&stored_msg->group_name, &jle->group_name, sizeof(mar_cpg_name_t));
+		list_init (&stored_msg->list);
+		list_add (&stored_msg->list, &joinlist_messages_head);
 		jle++;
 	}
 }
-- 
1.7.1

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux