Hi All,
Once again i seem to be having a blonde moment with our dev cluster,
after a full nuke of it i'm getting it going again and i've come across
a issue when tring to mount a gfs volume
root@asimov:~# mount -t gfs /dev/gnbd/shared /shared/
Lock_Harness <CVS> (built Jan 25 2006 17:22:25) installed
GFS <CVS> (built Jan 25 2006 17:22:54) installed
GFS: Trying to join cluster "lock_dlm", "ubernet:shared"
Lock_DLM (built Jan 25 2006 17:22:29) installed
lock_dlm: fence domain not found; check fenced
GFS: can't mount proto = lock_dlm, table = ubernet:shared, hostdata =
mount: permission denied
when i try and run fence from init.d i get this
fence_tool: waiting for fence domain run state
here is cat /proc/cluster/services
root@asimov:~# cat /proc/cluster/services
Service Name GID LID State Code
Fence Domain: "default" 0 3 join
S-1,80,2
[]
User: "usrm::manager" 1 1 run -
[1 2]
heres and strace of fence_tool join -D
execve("/usr/sbin/fence_tool", ["fence_tool", "join"], [/* 16 vars */]) = 0
uname({sys="Linux", node="asimov", ...}) = 0
brk(0) = 0x804d000
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7faa000
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or
directory)
old_mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7fa8000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or
directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=9831, ...}) = 0
old_mmap(NULL, 9831, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7fa5000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or
directory)
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\306S\1"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=1131932, ...}) = 0
old_mmap(NULL, 1141908, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb7e8e000
old_mmap(0xb7f9f000, 16384, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x110000) = 0xb7f9f000
old_mmap(0xb7fa3000, 7316, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7fa3000
close(3) = 0
munmap(0xb7fa5000, 9831) = 0
socket(0x1e /* PF_??? */, SOCK_DGRAM, 3) = 3
ioctl(3, 0x780b, 0) = 1
ioctl(3, 0x7805, 0) = 1
ioctl(3, 0xc1187890, 0xbfebeec0) = 0
close(3) = 0
brk(0) = 0x804d000
brk(0x806e000) = 0x806e000
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 3
connect(3, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(3, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(3, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
close(3) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 3
connect(3, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(3, "\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(3, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
close(3) = 0
execve("/usr/local/sbin/fenced", ["fenced"], [/* 16 vars */]) = -1
ENOENT (No such file or directory)
execve("/usr/local/bin/fenced", ["fenced"], [/* 16 vars */]) = -1 ENOENT
(No such file or directory)
execve("/usr/sbin/fenced", ["fenced"], [/* 16 vars */]) = 0
uname({sys="Linux", node="asimov", ...}) = 0
brk(0) = 0x8050000
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7f76000
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or
directory)
old_mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7f74000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or
directory)
open("/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=9831, ...}) = 0
old_mmap(NULL, 9831, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f71000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or
directory)
open("/lib/libpthread.so.0", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\0@\0\000"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=84199, ...}) = 0
old_mmap(NULL, 331364, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb7f20000
old_mmap(0xb7f2e000, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe000) = 0xb7f2e000
old_mmap(0xb7f2f000, 269924, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f2f000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or
directory)
open("/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\306S\1"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=1131932, ...}) = 0
old_mmap(NULL, 1141908, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb7e09000
old_mmap(0xb7f1a000, 16384, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x110000) = 0xb7f1a000
old_mmap(0xb7f1e000, 7316, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f1e000
close(3) = 0
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7e08000
munmap(0xb7f71000, 9831) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0
setrlimit(RLIMIT_STACK, {rlim_cur=2044*1024, rlim_max=RLIM_INFINITY}) = 0
getpid() = 5019
rt_sigaction(SIGRTMIN, {0xb7f27f85, [], 0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0xb7f2800e, [RTMIN], 0}, NULL, 8) = 0
rt_sigaction(SIGRT_2, {0xb7f28120, [], 0}, NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [RTMIN], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RT_1], NULL, 8) = 0
_sysctl({{CTL_KERN, KERN_VERSION, 0, 20c61, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 2, 0xbfb8ab88,
32, (nil), 0}) = 0
socket(PF_FILE, SOCK_DGRAM, 0) = 3
brk(0) = 0x8050000
brk(0x8071000) = 0x8071000
socket(0x1e /* PF_??? */, SOCK_DGRAM, 3) = 4
ioctl(4, 0x780b, 0) = 1
ioctl(4, 0xc1187890, 0xbfb8ad90) = 0
time(NULL) = 1138694819
sendto(3, "1138694819 our name from cman \"a"..., 39, 0,
{sa_family=AF_FILE, path=@fenced_socket}, 16) = -1 ECONNREFUSED
(Connection refused)
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(4, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0008\0\0\0/cluster/clu"..., 76)
= 76
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\7\0\0\0", 20) = 20
read(4, "asimov\0", 7) = 7
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0#\0\0\0/cluster/fen"..., 55) = 55
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0", 20) = 20
read(4, "0\0", 2) = 2
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\'\0\0\0/cluster/fen"..., 59) = 59
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0", 20) = 20
read(4, "3\0", 2) = 2
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\'\0\0\0/cluster/fen"..., 59) = 59
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0", 20) = 20
read(4, "0\0", 2) = 2
close(4) = 0
time(NULL) = 1138694819
sendto(3, "1138694819 delay post_join 3s po"..., 43, 0,
{sa_family=AF_FILE, path=@fenced_socket}, 16) = -1 ECONNREFUSED
(Connection refused)
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\7\0\0\0", 20) = 20
read(4, "asimov\0", 7) = 7
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0\0\0", 20) = 20
read(4, "abdul\0", 6) = 6
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0", 20) = 20
read(4, "faye\0", 5) = 5
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0\0\0", 20) = 20
read(4, "maria\0", 6) = 6
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0", 20) = 20
read(4, "mao\0", 4) = 4
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0", 20) = 20
read(4, "vincent\0", 8) = 8
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0", 20) = 20
read(4, "roco\0", 5) = 5
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\t\0\0\0", 20) = 20
read(4, "piccarro\0", 9) = 9
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\7\0\0\0", 20) = 20
read(4, "edward\0", 7) = 7
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0,\0\0\0/cluster/clu"..., 64) = 64
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0\0\0", 20) = 20
read(4, "baker\0", 6) = 6
close(4) = 0
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0,\0\0\0/cluster/clu"..., 64) = 64
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\303\377\377\377\0\0\0\0", 20) = 20
close(4) = 0
time(NULL) = 1138694819
sendto(3, "1138694819 added 10 nodes from c"..., 35, 0,
{sa_family=AF_FILE, path=@fenced_socket}, 16) = -1 ECONNREFUSED
(Connection refused)
getpid() = 5019
socket(PF_FILE, SOCK_STREAM, 0) = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(4, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
close(4) = 0
fork() = 5020
--- SIGCHLD (Child exited) @ 0 (0) ---
exit_group(0) = ?
This is running in a XEN 3.0 enviroment as an FYI
Does anyone have any pointers on what this could be?
Cheers
Tristram
--
Linux-cluster@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/linux-cluster