[PATCH v2] Send one confchg event per CPG group to CPG client

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We found that sheepdog will receive more than one confchg msg when
network partition occur.  For example, suppose the cluster has 4
nodes: N1, N2, N3, N4,  and they form a single-ring initially. After a
while, network partition occur, the single-ring divide into two
sub-ring: ring(N1, N2, N3) and ring(N4). The sheepdog in the ring(N4)
will receive the following confchg messages in turn:
Memb: N2,N3,N4  Left:N1         Joined:null
memb: N3,N4     Left:N2         Joined:null
memb: N4        Left:N3         Joined:null

This patch will fixed this bug, and the client will only receive one
confchg event in this case:
memb: N4        Left:N1,N2,N3   Joined:null

Signed-off-by: Yunkai Zhang <qiushu.zyk@xxxxxxxxxx>
---
 services/cpg.c |   82 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/services/cpg.c b/services/cpg.c
index caec097..f8a52b8 100644
--- a/services/cpg.c
+++ b/services/cpg.c
@@ -55,6 +55,7 @@
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <sys/mman.h>
+#include <qb/qbmap.h>
 
 #include <corosync/corotypes.h>
 #include <qb/qbipc_common.h>
@@ -705,7 +706,6 @@ static int notify_lib_joinlist(
 		for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
 			struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list);
 			if (mar_name_compare (&cpd->group_name, group_name) == 0) {
-				assert (left_list_entries <= 1);
 				assert (joined_list_entries <= 1);
 				if (joined_list_entries) {
 					if (joined_list[0].pid == cpd->pid &&
@@ -798,8 +798,19 @@ static void downlist_master_choose_and_send (void)
 {
 	struct downlist_msg *stored_msg;
 	struct list_head *iter;
-	mar_cpg_address_t left_list;
-	int i;
+	struct process_info *left_pi;
+	struct qb_map_t *group_map;
+	struct cpg_name cpg_group;
+	mar_cpg_name_t group;
+	struct confchg_data{
+		struct cpg_name cpg_group;
+		mar_cpg_address_t left_list[CPG_MEMBERS_MAX];
+		int left_list_entries;
+		struct list_head  list;
+	} *pcd;
+	qb_map_iter_t *miter;
+	int i, size;
+	char *p;
 
 	downlist_state = CPG_DOWNLIST_APPLYING;
 
@@ -810,27 +821,70 @@ static void downlist_master_choose_and_send (void)
 	}
 	downlist_log("chosen downlist", stored_msg);
 
-	/* send events */
+	group_map = qb_skiplist_create();
+
+	/*
+	 * only the cpg groups included in left nodes should receive
+	 * confchg event, so we will collect these cpg groups and
+	 * relative left_lists here.
+	 */
 	for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
 		struct process_info *pi = list_entry(iter, struct process_info, list);
 		iter = iter->next;
 
+		left_pi = NULL;
 		for (i = 0; i < stored_msg->left_nodes; i++) {
+
 			if (pi->nodeid == stored_msg->nodeids[i]) {
-				left_list.nodeid = pi->nodeid;
-				left_list.pid = pi->pid;
-				left_list.reason = CONFCHG_CPG_REASON_NODEDOWN;
-
-				notify_lib_joinlist(&pi->group, NULL,
-					0, NULL,
-					1, &left_list,
-					MESSAGE_RES_CPG_CONFCHG_CALLBACK);
-				list_del (&pi->list);
-				free (pi);
+				left_pi = pi;
 				break;
 			}
 		}
+
+		if (left_pi) {
+			marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group);
+			cpg_group.value[cpg_group.length] = 0;
+
+			pcd = (struct confchg_data *)qb_map_get(group_map, cpg_group.value);
+			if (pcd == NULL) {
+				pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data));
+				memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name));
+				qb_map_put(group_map, pcd->cpg_group.value, pcd);
+			}
+			size = pcd->left_list_entries;
+			pcd->left_list[size].nodeid = left_pi->nodeid;
+			pcd->left_list[size].pid = left_pi->pid;
+			pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN;
+			pcd->left_list_entries++;
+			list_del (&left_pi->list);
+			free (left_pi);
+		}
+	}
+
+	/* send only one confchg event per cpg group */
+	miter = qb_map_iter_create(group_map);
+	while (p = qb_map_iter_next(miter, (void **)&pcd)) {
+		marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group);
+
+		log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries);
+		for (i=0; i<pcd->left_list_entries; i++) {
+			log_printf (LOG_DEBUG, "left_list[%d] group:%d, ip:%s, pid:%d",
+				i, pcd->cpg_group.value,
+				(char*)api->totem_ifaces_print(pcd->left_list[i].nodeid),
+				pcd->left_list[i].pid);
+		}
+
+		/* send confchg event */
+		notify_lib_joinlist(&group, NULL,
+			0, NULL,
+			pcd->left_list_entries,
+			pcd->left_list,
+			MESSAGE_RES_CPG_CONFCHG_CALLBACK);
+
+		free(pcd);
 	}
+	qb_map_iter_free(miter);
+	qb_map_destroy(group_map);
 }
 
 static void downlist_messages_delete (void)
-- 
1.7.6.4

_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux