I got slightly different error this time, maybe some clue, and found this topic
similar:
http://lists.centos.org/pipermail/centos/2008-July/060274.html
This line may be the clue:
getsockname(3, 0x7fff7866e9e0, [128]) = -1 ENOTSOCK (Socket operation on
non-socket)
[root@frodo9 torque-2.3.2-snap.200807081528]# strace -f pbs_mom
.
.
.
fcntl(4, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=0, len=0}) = 0
clone(Process 26346 attached
child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x2aae326a1db0) = 26346
[pid 26345] exit_group(0) = ?
getsockname(3, 0x7fff7866e9e0, [128]) = -1 ENOTSOCK (Socket operation on
non-socket)
fcntl(3, F_GETFD) = 0
dup(3) = 7
fcntl(7, F_SETFD, 0) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 8
close(3) = 0
fcntl(8, F_GETFD) = 0
dup2(8, 3) = 3
fcntl(3, F_SETFD, 0) = 0
close(8) = 0
write(3, "\25\3\1\0
\307\276jJ\207v\7\3473A\355\16\340\232\347\35\\\311\307\3472i)\5L\t\22"..., 37)
= -1 EPIPE (Broken pipe)
--- SIGPIPE (Broken pipe) @ 0 (0) ---
Process 26346 detached
[root@frodo9 torque-2.3.2-snap.200807081528]#
Daniel Andrzejewski
--
Daniel Andrzejewski wrote:
Hi All,
I have a problem with torque (openPBS) on x86_64 CentOS 5.2. Just to add
there's no problem on a 32bit CentOS 5.2 or 64bit Ubuntu 8.04.
The problem is that pbs_mom's child quits without giving any error logs.
[root@frodo9 torque-2.3.3]# strace -f pbs_mom
.
.
.
bind(6, {sa_family=AF_INET, sin_port=htons(15002),
sin_addr=inet_addr("0.0.0.0")}, 16) = 0
time(NULL) = 1222785330
listen(6, 512) = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 7
setsockopt(7, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
bind(7, {sa_family=AF_INET, sin_port=htons(15003),
sin_addr=inet_addr("0.0.0.0")}, 16) = 0
time(NULL) = 1222785330
listen(7, 512) = 0
fcntl(5, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=0, len=0}) = 0
clone(Process 18441 attached
child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x2b276258fdb0) = 18441
[pid 18441] getsockname(3, {sa_family=AF_INET, sin_port=htons(56711),
sin_addr=inet_addr("172.16.0.9")}, [16]) = 0
[pid 18441] getpeername(3, {sa_family=AF_INET, sin_port=htons(389),
sin_addr=inet_addr("172.16.2.24")}, [68719476752]) = 0
[pid 18441] fcntl(3, F_GETFD) = 0x1 (flags FD_CLOEXEC)
[pid 18441] dup(3) = 8
[pid 18441] fcntl(8, F_SETFD, FD_CLOEXEC) = 0
[pid 18441] socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 9
[pid 18441] close(3) = 0
[pid 18441] fcntl(9, F_GETFD) = 0
[pid 18441] dup2(9, 3) = 3
[pid 18441] fcntl(3, F_SETFD, 0) = 0
[pid 18441] close(9) = 0
[pid 18441] write(3, "\25\3\1\0
\232\17\205\301f<O0\352\246\357\344Z\31&\243\361\356\2128\242\377\7O{\267\333"...,
37 <unfinished ...>
[pid 18440] exit_group(0) = ?
[pid 18441] <... write resumed> ) = -1 EPIPE (Broken pipe)
[pid 18441] --- SIGPIPE (Broken pipe) @ 0 (0) ---
Process 18441 detached
[root@frodo9 torque-2.3.3]#
I have asked torque users mailing list, but since the problem is related
strictly to CentOS 5.2 64bit, I thought I would go ahead and ask here.
Any help is appreciated.
Thanks,
Daniel
_______________________________________________
CentOS mailing list
CentOS@xxxxxxxxxx
http://lists.centos.org/mailman/listinfo/centos