Hi all!
I'm playing with the latest version of corosync on three nodes (ha01, ha02, ha03). All three nodes in two redundant rings (see config below). I simulate network failure on ha03 node by enabling firewall rule
I'm playing with the latest version of corosync on three nodes (ha01, ha02, ha03). All three nodes in two redundant rings (see config below). I simulate network failure on ha03 node by enabling firewall rule
[root@ha03 ~]# iptables -A INPUT -j REJECT
As I expect ha01 and ha02 nodes lost the connections to ha03, but quorate still present:
[root@ha01 ~]# corosync-quorumtool
BUT node ha03 also has a quorum with all other nodes!!!
[root@ha01 ~]# corosync-quorumtool
Quorum information
------------------
Date: Fri Oct 4 18:21:56 2013
Quorum provider: corosync_votequorum
Nodes: 2
Node ID: 1
Ring ID: 30160
Quorate: Yes
Votequorum information
----------------------
Expected votes: 3
Highest expected: 3
Total votes: 2
Quorum: 2
Flags: Quorate
Membership information
----------------------
Nodeid Votes Name
1 1 10.0.0.29 (local)
2 1 10.0.0.31
[root@ha03 ~]# corosync-quorumtool
Quorum information
------------------
Date: Fri Oct 4 18:06:57 2013
Quorum provider: corosync_votequorum
Nodes: 3
Node ID: 3
Ring ID: 2220
Quorate: Yes
Votequorum information
----------------------
Expected votes: 3
Highest expected: 3
Total votes: 3
Quorum: 2
Flags: Quorate
Membership information
----------------------
Nodeid Votes Name
1 1 10.0.0.29
2 1 10.0.0.31
3 1 10.0.0.32 (local)
in the log files I see this lines:
Oct 04 18:06:01 [731] ha03 corosync warning [MAIN ] Totem is unable to form a cluster because of an operating system or network fault. The most common cause of this message is that the local firewall is configured improperly.
Oct 04 18:06:02 [731] ha03 corosync warning [MAIN ] Totem is unable to form a cluster because of an operating system or network fault. The most common cause of this message is that the local firewall is configured improperly.
Oct 04 18:06:04 [731] ha03 corosync warning [MAIN ] Totem is unable to form a cluster because of an operating system or network fault. The most common cause of this message is that the local firewall is configured improperly.
As I understand there is no cluster on node ha03 but this node has quorum with other nodes. How can it possible? Why ha03 didn't loose a quorum? Is it an expected behavior? Can someone explain me this situation?
Thanks in advance
==================[config & other logs]====================
# rpm -qa | grep corosync
corosync-2.3.2-1.fc19.x86_64
corosynclib-2.3.2-1.fc19.x86_64
# rpm -qa | grep corosync
corosync-2.3.2-1.fc19.x86_64
corosynclib-2.3.2-1.fc19.x86_64
# /etc/corosync/corosync.conf
totem {
version: 2
crypto_cipher: none
crypto_hash: none
rrp_mode: passive
interface {
ringnumber: 0
bindnetaddr: 10.0.0.0
mcastport: 5405
ttl: 1
}
interface {
ringnumber: 1
bindnetaddr: 192.168.20.0
mcastport: 5405
ttl: 1
}
transport: udpu
}
logging {
fileline: off
to_logfile: yes
to_syslog: yes
logfile: /var/log/cluster/corosync.log
debug: off
timestamp: on
logger_subsys {
subsys: QUORUM
debug: off
}
}
nodelist {
node {
ring0_addr: 10.0.0.31
ring1_addr: 192.168.20.31
nodeid: 2
}
node {
ring0_addr: 10.0.0.29
ring1_addr: 192.168.20.29
nodeid: 1
}
node {
ring0_addr: 10.0.0.32
ring1_addr: 192.168.20.32
nodeid: 3
}
}
quorum {
provider: corosync_votequorum
}
[root@ha03 ~]# corosync-cmapctl
internal_configuration.service.0.name (str) = corosync_cmap
internal_configuration.service.0.ver (u32) = 0
internal_configuration.service.1.name (str) = corosync_cfg
internal_configuration.service.1.ver (u32) = 0
internal_configuration.service.2.name (str) = corosync_cpg
internal_configuration.service.2.ver (u32) = 0
internal_configuration.service.3.name (str) = corosync_quorum
internal_configuration.service.3.ver (u32) = 0
internal_configuration.service.4.name (str) = corosync_pload
internal_configuration.service.4.ver (u32) = 0
internal_configuration.service.5.name (str) = corosync_votequorum
internal_configuration.service.5.ver (u32) = 0
logging.debug (str) = off
logging.fileline (str) = off
logging.logfile (str) = /var/log/cluster/corosync.log
logging.logger_subsys.QUORUM.debug (str) = off
logging.logger_subsys.QUORUM.subsys (str) = QUORUM
logging.timestamp (str) = on
logging.to_logfile (str) = yes
logging.to_syslog (str) = yes
nodelist.local_node_pos (u32) = 2
nodelist.node.0.nodeid (u32) = 2
nodelist.node.0.ring0_addr (str) = 10.0.0.31
nodelist.node.0.ring1_addr (str) = 192.168.20.31
nodelist.node.1.nodeid (u32) = 1
nodelist.node.1.ring0_addr (str) = 10.0.0.29
nodelist.node.1.ring1_addr (str) = 192.168.20.29
nodelist.node.2.nodeid (u32) = 3
nodelist.node.2.ring0_addr (str) = 10.0.0.32
nodelist.node.2.ring1_addr (str) = 192.168.20.32
quorum.provider (str) = corosync_votequorum
runtime.blackbox.dump_flight_data (str) = no
runtime.blackbox.dump_state (str) = no
runtime.connections.active (u64) = 1
runtime.connections.closed (u64) = 416
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.client_pid (u32) = 1019
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.dispatched (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.flow_control (u32) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.flow_control_count (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.invalid_request (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.name (str) = corosync-cmapct
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.overload (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.queue_size (u32) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.recv_retries (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.requests (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.responses (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.send_retries (u64) = 0
runtime.connections.corosync-cmapct:1019:0x7fe8fb6ed320.service_id (u32) = 0
runtime.services.cfg.0.rx (u64) = 0
runtime.services.cfg.0.tx (u64) = 0
runtime.services.cfg.1.rx (u64) = 0
runtime.services.cfg.1.tx (u64) = 0
runtime.services.cfg.2.rx (u64) = 0
runtime.services.cfg.2.tx (u64) = 0
runtime.services.cfg.3.rx (u64) = 0
runtime.services.cfg.3.tx (u64) = 0
runtime.services.cfg.service_id (u16) = 1
runtime.services.cmap.0.rx (u64) = 3
runtime.services.cmap.0.tx (u64) = 1
runtime.services.cmap.service_id (u16) = 0
runtime.services.cpg.0.rx (u64) = 0
runtime.services.cpg.0.tx (u64) = 0
runtime.services.cpg.1.rx (u64) = 0
runtime.services.cpg.1.tx (u64) = 0
runtime.services.cpg.2.rx (u64) = 0
runtime.services.cpg.2.tx (u64) = 0
runtime.services.cpg.3.rx (u64) = 0
runtime.services.cpg.3.tx (u64) = 0
runtime.services.cpg.4.rx (u64) = 0
runtime.services.cpg.4.tx (u64) = 0
runtime.services.cpg.5.rx (u64) = 3
runtime.services.cpg.5.tx (u64) = 1
runtime.services.cpg.service_id (u16) = 2
runtime.services.pload.0.rx (u64) = 0
runtime.services.pload.0.tx (u64) = 0
runtime.services.pload.1.rx (u64) = 0
runtime.services.pload.1.tx (u64) = 0
runtime.services.pload.service_id (u16) = 4
runtime.services.quorum.service_id (u16) = 3
runtime.services.votequorum.0.rx (u64) = 7
runtime.services.votequorum.0.tx (u64) = 2
runtime.services.votequorum.1.rx (u64) = 0
runtime.services.votequorum.1.tx (u64) = 0
runtime.services.votequorum.2.rx (u64) = 0
runtime.services.votequorum.2.tx (u64) = 0
runtime.services.votequorum.3.rx (u64) = 0
runtime.services.votequorum.3.tx (u64) = 0
runtime.services.votequorum.service_id (u16) = 5
runtime.totem.pg.mrp.rrp.0.faulty (u8) = 0
runtime.totem.pg.mrp.rrp.1.faulty (u8) = 0
runtime.totem.pg.mrp.srp.avg_backlog_calc (u32) = 0
runtime.totem.pg.mrp.srp.avg_token_workload (u32) = 0
runtime.totem.pg.mrp.srp.commit_entered (u64) = 2
runtime.totem.pg.mrp.srp.commit_token_lost (u64) = 0
runtime.totem.pg.mrp.srp.consensus_timeouts (u64) = 543
runtime.totem.pg.mrp.srp.continuous_gather (u32) = 271
runtime.totem.pg.mrp.srp.continuous_sendmsg_failures (u32) = 0
runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure (u8) = 1
runtime.totem.pg.mrp.srp.gather_entered (u64) = 275
runtime.totem.pg.mrp.srp.gather_token_lost (u64) = 271
runtime.totem.pg.mrp.srp.mcast_retx (u64) = 0
runtime.totem.pg.mrp.srp.mcast_rx (u64) = 17
runtime.totem.pg.mrp.srp.mcast_tx (u64) = 7
runtime.totem.pg.mrp.srp.memb_commit_token_rx (u64) = 6
runtime.totem.pg.mrp.srp.memb_commit_token_tx (u64) = 6
runtime.totem.pg.mrp.srp.memb_join_rx (u64) = 4
runtime.totem.pg.mrp.srp.memb_join_tx (u64) = 7988
runtime.totem.pg.mrp.srp.memb_merge_detect_rx (u64) = 1014
runtime.totem.pg.mrp.srp.memb_merge_detect_tx (u64) = 0
runtime.totem.pg.mrp.srp.members.1.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(10.0.0.29) r(1) ip(192.168.20.29)
runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.1.status (str) = joined
runtime.totem.pg.mrp.srp.members.2.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(10.0.0.31) r(1) ip(192.168.20.31)
runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.2.status (str) = joined
runtime.totem.pg.mrp.srp.members.3.config_version (u64) = 0
runtime.totem.pg.mrp.srp.members.3.ip (str) = r(0) ip(10.0.0.32) r(1) ip(192.168.20.32)
runtime.totem.pg.mrp.srp.members.3.join_count (u32) = 1
runtime.totem.pg.mrp.srp.members.3.status (str) = joined
runtime.totem.pg.mrp.srp.mtt_rx_token (u32) = 188
runtime.totem.pg.mrp.srp.operational_entered (u64) = 2
runtime.totem.pg.mrp.srp.operational_token_lost (u64) = 1
runtime.totem.pg.mrp.srp.orf_token_rx (u64) = 2067
runtime.totem.pg.mrp.srp.orf_token_tx (u64) = 1
runtime.totem.pg.mrp.srp.recovery_entered (u64) = 2
runtime.totem.pg.mrp.srp.recovery_token_lost (u64) = 0
runtime.totem.pg.mrp.srp.rx_msg_dropped (u64) = 0
runtime.totem.pg.mrp.srp.token_hold_cancel_rx (u64) = 0
runtime.totem.pg.mrp.srp.token_hold_cancel_tx (u64) = 0
runtime.totem.pg.msg_queue_avail (u32) = 0
runtime.totem.pg.msg_reserved (u32) = 1
runtime.votequorum.ev_barrier (u32) = 3
runtime.votequorum.lowest_node_id (u32) = 1
runtime.votequorum.this_node_id (u32) = 3
runtime.votequorum.two_node (u8) = 0
totem.crypto_cipher (str) = none
totem.crypto_hash (str) = none
totem.interface.0.bindnetaddr (str) = 10.0.0.0
totem.interface.0.mcastport (u16) = 5405
totem.interface.0.ttl (u8) = 1
totem.interface.1.bindnetaddr (str) = 192.168.20.0
totem.interface.1.mcastport (u16) = 5405
totem.interface.1.ttl (u8) = 1
totem.rrp_mode (str) = passive
totem.transport (str) = udpu
totem.version (u32) = 2
_______________________________________________ discuss mailing list discuss@xxxxxxxxxxxx http://lists.corosync.org/mailman/listinfo/discuss