On Sun, Apr 26, 2020 at 07:24:33AM +0000, Gang He wrote: > Hello List, > > In libdlm code, cluster_ringid_seq variable is defined with uint32_t in dlm_controld/dlm_daemon.h, > the corosync API returns uinit64_t ring_id, in the current code, we use type cast to get the low-32bit ring-id. > But, in some case, the corosync returns a huge ring-id (greater than 32bit), the DLM daemon does not work normally (looks stuck). > Then, I want to know if we can change cluster_ringid_seq in libdlm from uint32_t to uint64_t? That looks ok, please try the attached patch. Dave
diff --git a/dlm_controld/cpg.c b/dlm_controld/cpg.c index e4b27fac3cd5..5b5c52fc09bc 100644 --- a/dlm_controld/cpg.c +++ b/dlm_controld/cpg.c @@ -450,21 +450,24 @@ static int check_ringid_done(struct lockspace *ls) but that's probably not guaranteed.) */ if (ls->cpg_ringid_wait) { - log_group(ls, "check_ringid wait cluster %u cpg %u:%llu", - cluster_ringid_seq, ls->cpg_ringid.nodeid, + log_group(ls, "check_ringid wait cluster %u cpg %llu:%llu", + (unsigned long long)cluster_ringid_seq, + ls->cpg_ringid.nodeid, (unsigned long long)ls->cpg_ringid.seq); return 0; } - if (cluster_ringid_seq != (uint32_t)ls->cpg_ringid.seq) { - log_group(ls, "check_ringid cluster %u cpg %u:%llu", - cluster_ringid_seq, ls->cpg_ringid.nodeid, + if (cluster_ringid_seq != ls->cpg_ringid.seq) { + log_group(ls, "check_ringid cluster %llu cpg %u:%llu", + (unsigned long long)cluster_ringid_seq, + ls->cpg_ringid.nodeid, (unsigned long long)ls->cpg_ringid.seq); return 0; } - log_limit(ls, "check_ringid done cluster %u cpg %u:%llu", - cluster_ringid_seq, ls->cpg_ringid.nodeid, + log_limit(ls, "check_ringid done cluster %llu cpg %u:%llu", + (unsigned long long)cluster_ringid_seq, + ls->cpg_ringid.nodeid, (unsigned long long)ls->cpg_ringid.seq); return 1; diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h index 3221e19cb7be..5b9a52dae750 100644 --- a/dlm_controld/dlm_daemon.h +++ b/dlm_controld/dlm_daemon.h @@ -179,7 +179,7 @@ EXTERN uint64_t fence_delay_begin; EXTERN uint64_t cluster_quorate_monotime; EXTERN uint64_t cluster_joined_monotime; EXTERN uint64_t cluster_joined_walltime; -EXTERN uint32_t cluster_ringid_seq; +EXTERN uint64_t cluster_ringid_seq; EXTERN char cluster_name[DLM_LOCKSPACE_LEN+1]; EXTERN int our_nodeid; EXTERN uint32_t control_minor; diff --git a/dlm_controld/member.c b/dlm_controld/member.c index 10351ec41d6d..da3a1f5b0e90 100644 --- a/dlm_controld/member.c +++ b/dlm_controld/member.c @@ -122,10 +122,10 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, cluster_quorate_monotime = now; cluster_quorate = quorate; - cluster_ringid_seq = (uint32_t)ring_seq; + cluster_ringid_seq = ring_seq; - log_debug("cluster quorum %u seq %u nodes %u", - cluster_quorate, cluster_ringid_seq, node_list_entries); + log_debug("cluster quorum %u seq %llu nodes %u", + cluster_quorate, (unsigned long long)cluster_ringid_seq, node_list_entries); old_node_count = quorum_node_count; memcpy(&old_nodes, &quorum_nodes, sizeof(old_nodes)); @@ -139,8 +139,8 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, for (i = 0; i < old_node_count; i++) { if (!is_cluster_member(old_nodes[i])) { - log_debug("cluster node %u removed seq %u", - old_nodes[i], cluster_ringid_seq); + log_debug("cluster node %u removed seq %llu", + old_nodes[i], (unsigned long long)cluster_ringid_seq); rem_cluster_node(old_nodes[i], now); del_configfs_node(old_nodes[i]); } @@ -148,8 +148,8 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, for (i = 0; i < quorum_node_count; i++) { if (!is_old_member(quorum_nodes[i])) { - log_debug("cluster node %u added seq %u", - quorum_nodes[i], cluster_ringid_seq); + log_debug("cluster node %u added seq %llu", + quorum_nodes[i], (unsigned long long)cluster_ringid_seq); add_cluster_node(quorum_nodes[i], now); fence_delay_begin = now;
_______________________________________________ linux-lvm mailing list linux-lvm@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/linux-lvm read the LVM HOW-TO at http://tldp.org/HOWTO/LVM-HOWTO/