Hi list
when doing a clustat, the rgmanager doesn't respond and you cant see
the cluster resource group (after long timeout).
A reboot solved the problem. The system even couldn't restart because
of error messages of rgmanager on screen -> only reset helped.
Sorry don't have collected any more useful informations. Please
request additional output. Attached the cluster.conf.
How to diagnose/understand the message "Node ID:0000000000000002 stuck
with lock usrm::rg="db""
on the secondary node:
Jun 15 18:19:24 oracle09 clurgmgrd[4432]: <warning> Node
ID:0000000000000002 stuck with lock usrm::rg="db"
Jun 15 18:19:54 oracle09 clurgmgrd[4432]: <warning> Node
ID:0000000000000002 stuck with lock usrm::rg="db"
Jun 15 18:20:26 oracle09 clurgmgrd[4432]: <warning> Node
ID:0000000000000002 stuck with lock usrm::rg="db"
on the primary node:
Jun 15 10:17:25 oracle08 kernel: rh_lkid 11300b8
Jun 15 10:17:25 oracle08 kernel: lockstate 0
Jun 15 10:17:25 oracle08 kernel: nodeid 2
Jun 15 10:17:25 oracle08 kernel: status 4294967279
Jun 15 10:17:25 oracle08 kernel: lkid f569ff84
Jun 15 10:17:25 oracle08 kernel: dlm: Magma: reply from 1 no lock
Jun 15 10:17:25 oracle08 kernel: dlm: reply
Jun 15 10:17:25 oracle08 kernel: rh_cmd 5
Jun 15 10:17:25 oracle08 kernel: rh_lkid eb01c5
Jun 15 10:17:25 oracle08 kernel: lockstate 0
Jun 15 10:17:25 oracle08 kernel: nodeid 2
Jun 15 10:17:25 oracle08 kernel: status 4294967279
Jun 15 10:17:25 oracle08 kernel: lkid f569ff84
Jun 15 10:17:25 oracle08 kernel: dlm: Magma: reply from 1 no lock
Jun 15 10:17:25 oracle08 kernel: dlm: reply
Jun 15 10:17:25 oracle08 kernel: rh_cmd 5
Jun 15 10:17:25 oracle08 kernel: rh_lkid 11e027c
Jun 15 10:17:25 oracle08 kernel: lockstate 0
Jun 15 10:17:26 oracle08 kernel: nodeid 2
Jun 15 10:17:26 oracle08 kernel: status 4294967279
Jun 15 10:17:26 oracle08 kernel: lkid f569ff84
Jun 15 10:17:26 oracle08 kernel: dlm: Magma: reply from 1 no lock
Jun 15 10:17:26 oracle08 kernel: dlm: reply
Jun 15 10:17:26 oracle08 kernel: rh_cmd 5
Jun 15 10:17:26 oracle08 kernel: rh_lkid 122025f
Jun 15 10:17:26 oracle08 kernel: lockstate 0
Jun 15 10:17:26 oracle08 kernel: nodeid 2
Jun 15 10:17:26 oracle08 kernel: status 4294967279
Jun 15 10:17:26 oracle08 kernel: lkid f569ff84
Jun 15 10:17:26 oracle08 kernel: dlm: Magma: reply from 1 no lock
Jun 15 10:17:26 oracle08 kernel: dlm: reply
Jun 15 10:17:26 oracle08 kernel: rh_cmd 5
Jun 15 10:17:26 oracle08 kernel: rh_lkid 12e0185
Jun 15 10:17:26 oracle08 kernel: lockstate 0
Jun 15 10:17:26 oracle08 kernel: nodeid 2
Jun 15 10:17:26 oracle08 kernel: status 4294967279
Jun 15 10:17:26 oracle08 kernel: lkid f569ff84
after initiating shutdown:
Jun 15 18:20:54 oracle08 fenced: Stopping fence domain:
Jun 15 18:20:54 oracle08 fenced: shutdown succeeded
Jun 15 18:20:54 oracle08 fenced: ESC[60G
Jun 15 18:20:54 oracle08 fenced:
Jun 15 18:20:54 oracle08 rc: Stopping fenced: succeeded
Jun 15 18:20:54 oracle08 lock_gulmd: Stopping lock_gulmd:
Jun 15 18:20:54 oracle08 lock_gulmd: shutdown succeeded
Jun 15 18:20:54 oracle08 lock_gulmd: ESC[60G
Jun 15 18:20:54 oracle08 lock_gulmd:
Jun 15 18:20:54 oracle08 rc: Stopping lock_gulmd: succeeded
Jun 15 18:20:54 oracle08 cman: Stopping cman:
Jun 15 18:20:58 oracle08 cman: failed to stop cman failed
Jun 15 18:20:58 oracle08 cman: ESC[60G
Jun 15 18:20:58 oracle08 cman:
Jun 15 18:20:58 oracle08 rc: Stopping cman: failed
Jun 15 18:20:58 oracle08 ccsd: Stopping ccsd:
Jun 15 18:20:58 oracle08 ccsd[2276]: Stopping ccsd, SIGTERM received.
Jun 15 18:20:59 oracle08 ccsd: shutdown succeeded
Jun 15 18:20:59 oracle08 ccsd: ESC[60G[
Jun 15 18:20:59 oracle08 ccsd:
Jun 15 18:20:59 oracle08 rc: Stopping ccsd: succeeded
--
XMPP: sjolle@xxxxxxxxxxxxxxx
<?xml version="1.0"?>
<cluster config_version="16" name="oracle_cluster">
<fence_daemon post_fail_delay="10" post_join_delay="15" />
<clusternodes>
<clusternode name="oracle09" votes="1">
<fence>
<method name="1">
<device name="manual" nodename="oracle09" />
</method>
</fence>
</clusternode>
<clusternode name="oracle08" votes="1">
<fence>
<method name="1">
<device name="manual" nodename="oracle08" />
</method>
</fence>
</clusternode>
</clusternodes>
<cman expected_votes="1" two_node="1" />
<fencedevices>
<fencedevice agent="fence_manual" name="manual" />
</fencedevices>
<rm>
<failoverdomains>
<failoverdomain name="oracle_cluster" ordered="0" restricted="0">
<failoverdomainnode name="oracle09" priority="1" />
<failoverdomainnode name="oracle08" priority="1" />
</failoverdomain>
</failoverdomains>
<resources />
<service autostart="1" domain="OGCSBB" name="db">
<fs device="/dev/sdb1" force_fsck="0" force_unmount="1" fsid="" fstype="ext3" mountpoint="/u00" name="u00" options="" self_fence="1">
<fs device="/dev/sdb2" force_fsck="0" force_unmount="1" fsid="" fstype="ext3" mountpoint="/u01" name="u01" options="" self_fence="1" />
<fs device="/dev/sdb3" force_fsck="0" force_unmount="1" fsid="" fstype="ext3" mountpoint="/u02" name="u02" options="" self_fence="1" />
<fs device="/dev/sdb5" force_fsck="0" force_unmount="1" fsid="" fstype="ext3" mountpoint="/u03" name="u03" options="" self_fence="1" />
<fs device="/dev/sdb6" force_fsck="0" force_unmount="1" fsid="" fstype="ext3" mountpoint="/u04" name="u04" options="" self_fence="1" />
<fs device="/dev/sdc1" force_fsck="0" force_unmount="1" fsid="33145" fstype="ext3" mountpoint="/u05" name="u05" options="" self_fence="1" />
<fs device="/dev/sdc2" force_fsck="0" force_unmount="1" fsid="53443" fstype="ext3" mountpoint="/u06" name="u06" options="" self_fence="1" />
</fs>
<ip address="192.168.89.39" monitor_link="1" />
<script file="/opt/cluster/scripts/dbora_clu" name="dbora_clu" />
</service>
</rm>
</cluster>
--
Linux-cluster mailing list
Linux-cluster@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/linux-cluster