Re: [PATCH] Send one confchg event per CPG group to CPG client

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks for the patch! Some comments inline.

-Angus

On Fri, Oct 28, 2011 at 12:22:13AM +0800, Yunkai Zhang wrote:
> We found that sheepdog will receive more than one confchg msg when
> network partition occur.  For example, suppose the cluster has 4
> nodes: N1, N2, N3, N4,  and they form a single-ring initially. After a
> while, network partition occur, the single-ring divide into two
> sub-ring: ring(N1, N2, N3) and ring(N4). The sheepdog in the ring(N4)
> will receive the following confchg messages in turn:
> Memb: N2,N3,N4  Left:N1         Joined:null
> memb: N3,N4     Left:N2         Joined:null
> memb: N4        Left:N3         Joined:null
> 
> This patch will fixed this bug, and the client will only receive one
> confchg event in this case:
> memb: N4        Left:N2,N3,N4   Joined:null
> 
> Signed-off-by: Yunkai Zhang <qiushu.zyk@xxxxxxxxxx>
> ---
>  configure.ac           |    2 +-
>  include/corosync/cpg.h |    1 +
>  services/cpg.c         |   97 +++++++++++++++++++++++++++++++++++++++++-------
>  3 files changed, 85 insertions(+), 15 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 563f799..aaed478 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -88,7 +88,7 @@ AC_HEADER_SYS_WAIT
>  AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h netinet/in.h stdint.h \
>  		  stdlib.h string.h sys/ioctl.h sys/param.h sys/socket.h \
>  		  sys/time.h syslog.h unistd.h sys/types.h getopt.h malloc.h \
> -		  sys/sockio.h utmpx.h])
> +		  sys/sockio.h utmpx.h search.h])

Why this? it's not included anywhere in corosync.

>  
>  # Checks for typedefs, structures, and compiler characteristics.
>  AC_C_CONST
> diff --git a/include/corosync/cpg.h b/include/corosync/cpg.h
> index f7956fa..9861c5a 100644
> --- a/include/corosync/cpg.h
> +++ b/include/corosync/cpg.h
> @@ -95,6 +95,7 @@ struct cpg_name {
>  };
>  
>  #define CPG_MEMBERS_MAX 128
> +#define CPG_GROUPS_MAX 64

I think this is ok, though not 100% sure.

>  
>  struct cpg_iteration_description_t {
>  	struct cpg_name group;
> diff --git a/services/cpg.c b/services/cpg.c
> index caec097..dc72c84 100644
> --- a/services/cpg.c
> +++ b/services/cpg.c
> @@ -55,6 +55,7 @@
>  #include <netinet/in.h>
>  #include <arpa/inet.h>
>  #include <sys/mman.h>
> +#include <qb/qbmap.h>
>  
>  #include <corosync/corotypes.h>
>  #include <qb/qbipc_common.h>
> @@ -705,7 +706,6 @@ static int notify_lib_joinlist(
>  		for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
>  			struct cpg_pd *cpd = list_entry (iter, struct cpg_pd, list);
>  			if (mar_name_compare (&cpd->group_name, group_name) == 0) {
> -				assert (left_list_entries <= 1);
>  				assert (joined_list_entries <= 1);
>  				if (joined_list_entries) {
>  					if (joined_list[0].pid == cpd->pid &&
> @@ -798,8 +798,18 @@ static void downlist_master_choose_and_send (void)
>  {
>  	struct downlist_msg *stored_msg;
>  	struct list_head *iter;
> -	mar_cpg_address_t left_list;
> -	int i;
> +	struct process_info *left_pi;
> +	struct qb_map_t *group_htab;
> +	struct cpg_name cpg_group;
> +	mar_cpg_name_t group;
> +	struct confchg_data{
> +		struct cpg_name cpg_group;
> +		mar_cpg_address_t left_list[CPG_MEMBERS_MAX];
> +		int left_list_entries;
> +		struct list_head  list;
> +	} *pcd;
> +	DECLARE_LIST_INIT(confchg_data_list_head);
> +	int i, size;

Why the list? You could use qb_skiplist_create() and use the
iterator.

miter = qb_map_iter_create(group_map);
while ((p = qb_map_iter_next(miter, (void**)&pcd))) {
 ...
}

Though using libqb here is going to make it impossible to cherry-pick
the patch into the flatiron branch (Steven/Honzaf would this be needed
there?).

>  
>  	downlist_state = CPG_DOWNLIST_APPLYING;
>  
> @@ -810,27 +820,86 @@ static void downlist_master_choose_and_send (void)
>  	}
>  	downlist_log("chosen downlist", stored_msg);
>  
> -	/* send events */
> +	group_htab = qb_hashtable_create(CPG_GROUPS_MAX);
> +
> +        /* 
> +	 * only the cpg groups included in left nodes should receive 
> +	 * confchg event, so we will collect these cpg groups and 
> +	 * relative left_lists here
> +	 */
>  	for (iter = process_info_list_head.next; iter != &process_info_list_head; ) {
>  		struct process_info *pi = list_entry(iter, struct process_info, list);
>  		iter = iter->next;
>  
> +		left_pi = NULL;
>  		for (i = 0; i < stored_msg->left_nodes; i++) {
> +
>  			if (pi->nodeid == stored_msg->nodeids[i]) {
> -				left_list.nodeid = pi->nodeid;
> -				left_list.pid = pi->pid;
> -				left_list.reason = CONFCHG_CPG_REASON_NODEDOWN;
> -
> -				notify_lib_joinlist(&pi->group, NULL,
> -					0, NULL,
> -					1, &left_list,
> -					MESSAGE_RES_CPG_CONFCHG_CALLBACK);
> -				list_del (&pi->list);
> -				free (pi);
> +				left_pi = pi;
>  				break;
>  			}
>  		}
> +
> +		if (left_pi) {
> +			marshall_from_mar_cpg_name_t(&cpg_group, &left_pi->group);
> +			log_printf (LOG_DEBUG, "cpg group name:%s", cpg_group.value);
> +			cpg_group.value[cpg_group.length] = 0;
> +
> +			pcd = (struct confchg_data *)qb_map_get(group_htab, 
> +					cpg_group.value);
> +			if (pcd != NULL) {
> +				size = pcd->left_list_entries;
> +				pcd->left_list[size].nodeid = left_pi->nodeid;
> +				pcd->left_list[size].pid = left_pi->pid;
> +				pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN;
> +				pcd->left_list_entries++;
> +			}else {
> +				pcd = (struct confchg_data *)malloc(sizeof(struct confchg_data));
> +				memset(pcd, 0, sizeof(struct confchg_data));
> +
> +				memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name));
> +				pcd->left_list[0].nodeid = left_pi->nodeid;
> +				pcd->left_list[0].pid = left_pi->pid;
> +				pcd->left_list[0].reason = CONFCHG_CPG_REASON_NODEDOWN; 
> +				pcd->left_list_entries = 1;
> +
> +				qb_map_put(group_htab, pcd->cpg_group.value, pcd);
> +
> +				list_init(&pcd->list);
> +				list_add(&pcd->list, &confchg_data_list_head);
> +			}

This could be a bit simplified:

			pcd = (struct confchg_data *)qb_map_get(group_map,
					cpg_group.value);
			if (pcd == NULL) {
				pcd = (struct confchg_data *)calloc(1, sizeof(struct confchg_data));
				memcpy(&pcd->cpg_group, &cpg_group, sizeof(struct cpg_name));
				qb_map_put(group_map, pcd->cpg_group.value, pcd);
			}
			size = pcd->left_list_entries;
			pcd->left_list[size].nodeid = left_pi->nodeid;
			pcd->left_list[size].pid = left_pi->pid;
			pcd->left_list[size].reason = CONFCHG_CPG_REASON_NODEDOWN;
			pcd->left_list_entries++;
			list_del (&left_pi->list);
			free (left_pi);


> +
> +			list_del (&left_pi->list);
> +			free (left_pi);
> +		}
> +	}
> +
> +	/* send only one confchg event per cpg group */
> +	for (iter = confchg_data_list_head.next; iter != &confchg_data_list_head; ) {
> +		pcd = list_entry(iter, struct confchg_data, list);
> +		iter = iter->next;
> +
> +		marshall_to_mar_cpg_name_t(&group, &pcd->cpg_group);
> +
> +		log_printf (LOG_DEBUG, "left_list_entries:%d", pcd->left_list_entries);
> +		for (i=0; i<pcd->left_list_entries; i++) {
> +			log_printf (LOG_DEBUG, "left_list[%d] group:%d, ip:%s, pid:%d", 
> +				i, pcd->cpg_group.value,
> +				(char*)api->totem_ifaces_print(pcd->left_list[i].nodeid), 
> +				pcd->left_list[i].pid);
> +		}
> +		
> +		/* send confchg event */
> +		notify_lib_joinlist(&group, NULL,
> +			0, NULL,
> +			pcd->left_list_entries, 
> +			pcd->left_list,
> +			MESSAGE_RES_CPG_CONFCHG_CALLBACK);
> +
> +		free(pcd);
>  	}
> +
> +	qb_map_destroy(group_htab);
>  }
>  
>  static void downlist_messages_delete (void)
> -- 
> 1.7.6.4
_______________________________________________
discuss mailing list
discuss@xxxxxxxxxxxx
http://lists.corosync.org/mailman/listinfo/discuss


[Index of Archives]     [Linux Clusters]     [Corosync Project]     [Linux USB Devel]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Linux Kernel]     [Linux SCSI]     [X.Org]

  Powered by Linux