Hello,
I've set up a 3-node cluster, where i seem to be having problems with
some of my GFS2 mounts. All servers have 2 gfs2 mounts on iscsi luns,
/var/lib/libvirt/sanlock and /etc/libvirt/qemu.
/dev/mapper/iscsi_cluster_qemu on /etc/libvirt/qemu type gfs2
(rw,relatime,hostdata=jid=0)
/dev/mapper/iscsi_cluster_sanlock on /var/lib/libvirt/sanlock type gfs2
(rw,relatime,hostdata=jid=0)
Currently, on vm01-test, i cannot go to /var/lib/libvirt/sanlock:
root@vm01-test:~# ls /var/lib/libvirt/sanlock
^C^C^C
^C
^C
The same command on vm02-test ( and vm03-test ):
root@vm02-test:~# ls /var/lib/libvirt/sanlock/
42f8374d2c9513171301d94ab3f4c921 e193ecac416d5d6a4b7433ca80e201c5
f97ab2f33af3dc0f3fc38a9921aa3711 __LIBVIRT__DISKS__
I have tried rebooting the whole cluster, rebooting several nodes,
restarting cman, etc, it never fully works. If it's not happening on
vm01, it happens on one of the other nodes. Both gfs2 volumes have been
stuck like this on 1 of the 3 nodes.
I've included as much info as possible to assist you guys in getting to
the bottom of this, if i have forgotten something, please let me know! I
would really like to know what i'm missing here,
Cluster contains the following components:
cman 3.1.7-0ubuntu2.1
gfs2-cluster 3.1.3-0ubuntu1
corosync 1.4.2-2
lvm2 2.02.95-4ppa1
sanlock 2.2-1
libvirt-bin 0.9.13-1ppa1
rgmanager 3.1.7-0ubuntu2.1
Main configuration for the cluster is as follows:
<cluster name="kvm" config_version="11">
<logging debug="on"/>
<clusternodes>
<clusternode name="vm01-test" nodeid="1">
<fence>
<method name="apc">
<device name="apc01" port="1" action="off"/>
<device name="apc02" port="1" action="off"/>
<device name="apc01" port="1" action="on"/>
<device name="apc02" port="1" action="on"/>
</method>
</fence>
</clusternode>
<clusternode name="vm02-test" nodeid="2">
<fence>
<method name="apc">
<device name="apc01" port="8" action="off"/>
<device name="apc02" port="8" action="off"/>
<device name="apc01" port="8" action="on"/>
<device name="apc02" port="8" action="on"/>
</method>
</fence>
</clusternode>
<clusternode name="vm03-test" nodeid="3">
<fence>
<method name="apc">
<device name="apc01" port="2" action="off"/>
<device name="apc02" port="2" action="off"/>
<device name="apc01" port="2" action="on"/>
<device name="apc02" port="2" action="on"/>
</method>
</fence>
</clusternode>
</clusternodes>
<fencedevices>
<fencedevice agent="fence_apc" ipaddr="apc01" secure="on"
login="device" name="apc01" passwd="xxx"/>
<fencedevice agent="fence_apc" ipaddr="apc02" secure="on"
login="device" name="apc02" passwd="xxx"/>
</fencedevices>
<rm log_level="5">
<failoverdomains>
<failoverdomain name="any_node" nofailback="1" ordered="0"
restricted="0"/>
</failoverdomains>
<vm domain="any_node" max_restarts="2" migrate="live"
name="cloudstack" path="/etc/libvirt/qemu/" recovery="restart"
restart_expire_time="600"/>
<vm domain="any_node" max_restarts="2" migrate="live" name="test"
path="/etc/libvirt/qemu/" recovery="restart" restart_expire_time="600"/>
</rm>
<totem rrp_mode="none" secauth="off"/>
<quorumd device="/dev/mapper/iscsi_cluster_quorum"></quorumd>
</cluster>
Output from various commands:
root@vm01-test:~# dlm_tool ls
dlm lockspaces
name rgmanager
id 0x5231f3eb
flags 0x00000000
change member 3 joined 1 remove 0 failed 0 seq 1,1
members 1 2 3
name sanlock
id 0x3c282c0a
flags 0x00000008 fs_reg
change member 3 joined 1 remove 0 failed 0 seq 3,3
members 1 2 3
name qemu
id 0xb061106c
flags 0x00000008 fs_reg
change member 3 joined 1 remove 0 failed 0 seq 5,5
members 1 2 3
name clvmd
id 0x4104eefa
flags 0x00000000
change member 1 joined 1 remove 0 failed 0 seq 1,1
members 1
root@vm02-test:~# dlm_tool ls
dlm lockspaces
name clvmd
id 0x4104eefa
flags 0x00000000
change member 2 joined 1 remove 0 failed 0 seq 1,1
members 1 2
name qemu
id 0xb061106c
flags 0x00000008 fs_reg
change member 3 joined 1 remove 0 failed 0 seq 1,1
members 1 2 3
name rgmanager
id 0x5231f3eb
flags 0x00000000
change member 3 joined 1 remove 0 failed 0 seq 3,3
members 1 2 3
name sanlock
id 0x3c282c0a
flags 0x00000008 fs_reg
change member 3 joined 1 remove 0 failed 0 seq 2,2
members 1 2 3
root@vm02-test:~# clustat
Cluster Status for kvm @ Wed Aug 15 17:19:24 2012
Member Status: Quorate
Member Name ID
Status
------ ---- ----
------
vm01-test 1
Online
vm02-test 2
Online, Local, rgmanager
vm03-test 3
Online, rgmanager
/dev/mapper/iscsi_cluster_quorum 0
Online, Quorum Disk
Service Name Owner
(Last) State
------- ---- -----
------ -----
vm:cloudstack
(vm03-test) stopped
vm:test
(vm02-test)
disabled
root@vm02-test:~# sanlock client status
daemon 806a79ee-ef22-4296-abf4-5f2d531063a1.vm02-test
p -1 listener
p -1 status
s __LIBVIRT__DISKS__:2:/var/lib/libvirt/sanlock/__LIBVIRT__DISKS__:0
root@vm01-test:~# cman_tool status
Version: 6.2.0
Config Version: 11
Cluster Name: kvm
Cluster Id: 773
Cluster Member: Yes
Cluster Generation: 1220
Membership state: Cluster-Member
Nodes: 3
Expected votes: 3
Quorum device votes: 2
Total votes: 5
Node votes: 1
Quorum: 3
Active subsystems: 9
Flags:
Ports Bound: 0 11 178
Node name: vm01-test
Node ID: 1
Multicast addresses: 239.192.3.8
Node addresses: 10.254.128.240
root@vm02-test:~# cman_tool status
Version: 6.2.0
Config Version: 11
Cluster Name: kvm
Cluster Id: 773
Cluster Member: Yes
Cluster Generation: 1220
Membership state: Cluster-Member
Nodes: 3
Expected votes: 3
Quorum device votes: 2
Total votes: 5
Node votes: 1
Quorum: 3
Active subsystems: 9
Flags:
Ports Bound: 0 11 177 178
Node name: vm02-test
Node ID: 2
Multicast addresses: 239.192.3.8
Node addresses: 10.254.128.65
root@vm01-test:~# ps aux | grep gfs
root 11686 0.0 0.0 140080 1996 ? Ssl 13:20 0:00
/usr/sbin/gfs_controld
root 14172 0.0 0.0 0 0 ? S< 13:21 0:00
[gfs_recovery]
root 14183 0.0 0.0 0 0 ? S 13:21 0:00
[gfs2_logd]
root 14184 0.0 0.0 0 0 ? S 13:21 0:00
[gfs2_quotad]
root 14388 0.0 0.0 0 0 ? S 13:21 0:00
[gfs2_logd]
root 14389 0.0 0.0 0 0 ? S 13:21 0:00
[gfs2_quotad]
root 14621 0.0 0.0 4316 540 ? D 13:25 0:00
/sbin/mount.gfs2 /dev/mapper/iscsi_cluster_sanlock
/var/lib/libvirt/sanlock -o rw
root 20438 0.0 0.0 9380 944 pts/7 S+ 17:25 0:00 grep
--color=auto gfs
root@vm01-test:~# ps aux | grep dlm
root 7430 0.0 0.0 0 0 ? S< 12:23 0:00
[user_dlm]
root 11606 0.0 0.0 223096 2076 ? Ssl 13:20 0:00
dlm_controld
root 13614 0.0 0.0 0 0 ? S 13:20 0:00
[dlm_scand]
root 13615 0.0 0.0 0 0 ? S< 13:20 0:00
[dlm_recv]
root 13616 0.0 0.0 0 0 ? S< 13:20 0:00
[dlm_send]
root 13617 0.0 0.0 0 0 ? S 13:20 0:00
[dlm_recoverd]
root 14174 0.0 0.0 0 0 ? S< 13:21 0:00
[dlm_callback]
root 14175 0.0 0.0 0 0 ? S 13:21 0:00
[dlm_recoverd]
root 14382 0.0 0.0 0 0 ? S< 13:21 0:00
[dlm_callback]
root 14383 0.0 0.0 0 0 ? S 13:21 0:00
[dlm_recoverd]
root 15525 0.0 0.0 0 0 ? S 13:35 0:00
[dlm_recoverd]
root 20442 0.0 0.0 9380 940 pts/7 S+ 17:25 0:00 grep
--color=auto dlm
root@vm02-test:~# ps aux | grep gfs
root 8433 0.0 0.0 140080 2016 ? Ssl 13:31 0:00
/usr/sbin/gfs_controld
root 8465 0.0 0.0 0 0 ? S< 13:31 0:00
[gfs_recovery]
root 8493 0.0 0.0 0 0 ? S 13:31 0:00
[gfs2_logd]
root 8494 0.0 0.0 0 0 ? S 13:31 0:00
[gfs2_quotad]
root 9860 0.0 0.0 0 0 ? S 13:34 0:00
[gfs2_logd]
root 9861 0.0 0.0 0 0 ? S 13:34 0:00
[gfs2_quotad]
root 12818 0.0 0.0 9380 940 pts/0 S+ 17:25 0:00 grep
--color=auto gfs
root@vm02-test:~# ps aux | grep dlm
root 8012 0.0 0.0 223096 2064 ? Ssl 12:04 0:00
dlm_controld
root 8467 0.0 0.0 0 0 ? S 13:31 0:00
[dlm_scand]
root 8468 0.0 0.0 0 0 ? S< 13:31 0:00
[dlm_recv]
root 8469 0.0 0.0 0 0 ? S< 13:31 0:00
[dlm_send]
root 8485 0.0 0.0 0 0 ? S< 13:31 0:00
[dlm_callback]
root 8486 0.0 0.0 0 0 ? S 13:31 0:00
[dlm_recoverd]
root 8560 0.0 0.0 0 0 ? S 13:31 0:00
[dlm_recoverd]
root 9851 0.0 0.0 0 0 ? S< 13:34 0:00
[dlm_callback]
root 9852 0.0 0.0 0 0 ? S 13:34 0:00
[dlm_recoverd]
root 12603 0.0 0.0 0 0 ? S 17:18 0:00
[dlm_recoverd]
root 12820 0.0 0.0 9380 940 pts/0 S+ 17:25 0:00 grep
--color=auto dlm
root@vm02-test:~# gfs2_tool journals /var/lib/libvirt/sanlock
journal2 - 8MB
journal3 - 8MB
journal1 - 8MB
journal0 - 8MB
4 journal(s) found.
root@vm02-test:~# gfs2_tool journals /etc/libvirt/qemu
journal2 - 8MB
journal3 - 8MB
journal1 - 8MB
journal0 - 8MB
4 journal(s) found.
# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 12.04 LTS
Release: 12.04
Codename: precise
--
Linux-cluster mailing list
Linux-cluster@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/linux-cluster