Thanks for the patch! Some comments inline. -Angus On Fri, Oct 28, 2011 at 12:22:13AM +0800, Yunkai Zhang wrote: > We found that sheepdog will receive more than one confchg msg when > network partition occur. For example, suppose the cluster has 4 > nodes: N1, N2, N3, N4, and they form a single-ring initially. After a > while, network partition occur, the single-ring divide into two > sub-ring: ring(N1, N2, N3) and ring(N4). The sheepdog in the ring(N4) > will receive the following confchg messages in turn: > Memb: N2,N3,N4 Left:N1 Joined:null > memb: N3,N4 Left:N2 Joined:null > memb: N4 Left:N3 Joined:null > > This patch will fixed this bug, and the client will only receive one > confchg event in this case: > memb: N4 Left:N2,N3,N4 Joined:null > > Signed-off-by: Yunkai Zhang <qiushu.zyk@xxxxxxxxxx> > --- > configure.ac | 2 +- > include/corosync/cpg.h | 1 + > services/cpg.c | 97 +++++++++++++++++++++++++++++++++++++++++------- > 3 files changed, 85 insertions(+), 15 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 563f799..aaed478 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -88,7 +88,7 @@ AC_HEADER_SYS_WAIT > AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \ > stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \ > sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \ > - sys/sockio.h utmpx.h]) > + sys/sockio.h utmpx.h search.h]) Why this? it's not included anywhere in corosync. > > # Checks for typedefs, structures, and compiler characteristics. > AC_C_CONST > diff --git a/include/corosync/cpg.h b/include/corosync/cpg.h > index f7956fa..9861c5a 100644 > --- a/include/corosync/cpg.h > +++ b/include/corosync/cpg.h > @@ -95,6 +95,7 @@ struct cpg_name { > }; > > #define CPG_MEMBERS_MAX 128 > +#define CPG_GROUPS_MAX 64 I think this is ok, though not 100% sure. > > struct cpg_iteration_description_t { > struct cpg_name group; > diff --git a/services/cpg.c b/services/cpg.c > index caec097..dc72c84 100644 > --- a/services/cpg.c > +++ b/services/cpg.c > @@ -55,6 +55,7 @@ > #include <netinet/in.h> > #include <arpa/inet.h> > #include <sys/mman.h> > +#include <qb/qbmap.h> > > #include <corosync/corotypes.h> > #include <qb/qbipc_common.h> > @@ -705,7 +706,6 @@ static int notify_lib_joinlist( > for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) { > struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list); > if (mar_name_compare (&cpd->group_name, group_name) == 0) { > - assert (left_list_entries <= 1); > assert (joined_list_entries <= 1); > if (joined_list_entries) { > if (joined_list[0].pid == cpd->pid && > @@ -798,8 +798,18 @@ static void downlist_master_choose_and_send (void) > { > struct downlist_msg *stored_msg; > struct list_head *iter; > - mar_cpg_address_t left_list; > - int i; > + struct process_info *left_pi; > + struct qb_map_t *group_htab; > + struct cpg_name cpg_group; > + mar_cpg_name_t group; > + struct confchg_data{ > + struct cpg_name cpg_group; > + mar_cpg_address_t left_list[CPG_MEMBERS_MAX]; > + int left_list_entries; > + struct list_head list; > + } *pcd; > + DECLARE_LIST_INIT(confchg_data_list_head); > + int i, size; Why the list? You could use qb_skiplist_create() and use the iterator. miter = qb_map_iter_create(group_map); while ((p = qb_map_iter_next(miter, (void**)&pcd))) { ... } Though using libqb here is going to make it impossible to cherry-pick the patch into the flatiron branch (Steven/Honzaf would this be needed there?). > > downlist_state = CPG_DOWNLIST_APPLYING; > > @@ -810,27 +820,86 @@ static void downlist_master_choose_and_send (void) > } > downlist_log("chosen downlist", stored_msg); > > - /* send events */ > + group_htab = qb_hashtable_create(CPG_GROUPS_MAX); > + > + /* > + * only the cpg groups included in left nodes should receive > + * confchg event, so we will collect these cpg groups and > + * relative left_lists here > + */ > for (iter = process_info_list_head.next; iter != &process_info_list_head; ) { > struct process_info *pi = list_entry(iter, struct process_info, list); > iter = iter->next; > > + left_pi = NULL; > for (i = 0; i < stored_msg->left_nodes; i++) { > + > if (pi->nodeid == stored_msg->nodeids[i]) { > - left_list.nodeid = pi->nodeid; > - left_list.pid = pi->pid; > - left_list.reason = CONFCHG_CPG_REASON_NODEDOWN; > - > - notify_lib_joinlist(&pi->group, NULL, > - 0, NULL, > - 1, &left_list, > - MESSAGE_RES_CPG_CONFCHG_CALLBACK); > - list_del (&pi->list); > - free (pi); > + left_pi = pi; > break; > } > } > + > + if (left_pi) { > + marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group); > + log_printf (LOG_DEBUG, "cpg group name:%s", cpg_group.value); > + cpg_group.value[cpg_group.length] = 0; > + > + pcd = (struct confchg_data *)qb_map_get(group_htab, > + cpg_group.value); > + if (pcd != NULL) { > + size = pcd->left_list_entries; > + pcd->left_list[size].nodeid = left_pi->nodeid; > + pcd->left_list[size].pid = left_pi->pid; > + pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN; > + pcd->left_list_entries++; > + }else { > + pcd = (struct confchg_data *)malloc(sizeof(struct confchg_data)); > + memset(pcd, 0, sizeof(struct confchg_data)); > + > + memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name)); > + pcd->left_list[0].nodeid = left_pi->nodeid; > + pcd->left_list[0].pid = left_pi->pid; > + pcd->left_list[0].reason = CONFCHG_CPG_REASON_NODEDOWN; > + pcd->left_list_entries = 1; > + > + qb_map_put(group_htab, pcd->cpg_group.value, pcd); > + > + list_init(&pcd->list); > + list_add(&pcd->list, &confchg_data_list_head); > + } This could be a bit simplified: pcd = (struct confchg_data *)qb_map_get(group_map, cpg_group.value); if (pcd == NULL) { pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data)); memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name)); qb_map_put(group_map, pcd->cpg_group.value, pcd); } size = pcd->left_list_entries; pcd->left_list[size].nodeid = left_pi->nodeid; pcd->left_list[size].pid = left_pi->pid; pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN; pcd->left_list_entries++; list_del (&left_pi->list); free (left_pi); > + > + list_del (&left_pi->list); > + free (left_pi); > + } > + } > + > + /* send only one confchg event per cpg group */ > + for (iter = confchg_data_list_head.next; iter != &confchg_data_list_head; ) { > + pcd = list_entry(iter, struct confchg_data, list); > + iter = iter->next; > + > + marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group); > + > + log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries); > + for (i=0; i<pcd->left_list_entries; i++) { > + log_printf (LOG_DEBUG, "left_list[%d] group:%d, ip:%s, pid:%d", > + i, pcd->cpg_group.value, > + (char*)api->totem_ifaces_print(pcd->left_list[i].nodeid), > + pcd->left_list[i].pid); > + } > + > + /* send confchg event */ > + notify_lib_joinlist(&group, NULL, > + 0, NULL, > + pcd->left_list_entries, > + pcd->left_list, > + MESSAGE_RES_CPG_CONFCHG_CALLBACK); > + > + free(pcd); > } > + > + qb_map_destroy(group_htab); > } > > static void downlist_messages_delete (void) > -- > 1.7.6.4 _______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss