Hi All I have been having a persistent problem with shutting down a cluster node. I have a two-node cluster. If Node A starts first, there is no problem rebooting Node B at any time. But if I try to reboot Node A, it hangs while trying to kill "clurgmgrd": > ps -eaf |grep clurgmgrd 116:root 25824 1 0 10:45 ? 00:00:00 clurgmgrd > strace -f kill -TERM 25824 execve("/bin/kill", ["kill", "-TERM", "25824"], [/* 28 vars */]) = 0 uname({sys="Linux", node="mix", ...}) = 0 brk(0) = 0x503000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95556000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=114663, ...}) = 0 mmap(NULL, 114663, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a95557000 close(3) = 0 open("/lib64/tls/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\305\21"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1493186, ...}) = 0 mmap(0x3e10100000, 2310056, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3e10100000 mprotect(0x3e1022b000, 1085352, PROT_NONE) = 0 mmap(0x3e1032a000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12a000) = 0x3e1032a000 mmap(0x3e10330000, 16296, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3e10330000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95573000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95574000 mprotect(0x3e1032a000, 12288, PROT_READ) = 0 arch_prctl(ARCH_SET_FS, 0x2a95573b00) = 0 munmap(0x2a95557000, 114663) = 0 open("/usr/lib/locale/locale-archive", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=48516832, ...}) = 0 mmap(NULL, 48516832, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a95575000 close(3) = 0 brk(0) = 0x503000 brk(0x524000) = 0x524000 kill(25824, SIGTERM) = 0 exit_group(0) = ? Process 28578 detached > ps -eaf |grep clurgmgrd 116:root 25824 1 0 10:45 ? 00:00:00 clurgmgrd > strace -p 25824 Process 25824 attached - interrupt to quit select(7, [4 5 6], NULL, NULL, {7, 735000}) = 0 (Timeout) socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20 read(9, "\1\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\0\0\0\0", 20) = 20 close(9) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\3\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\31\0\0\0/cluster/@"..., 45) = 45 read(9, "\3\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\3\0\0\0", 20) = 20 read(9, "30\0", 3) = 3 close(9) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\2\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\0\0\0\0", 20) = 20 read(9, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20 close(9) = 0 select(7, [6], [6], NULL, {0, 0}) = 0 (Timeout) select(6, [5], [5], NULL, {0, 0}) = 0 (Timeout) select(7, [4 5 6], NULL, NULL, {10, 0}) = 0 (Timeout) socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20 read(9, "\1\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\0\0\0\0", 20) = 20 close(9) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\3\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\31\0\0\0/cluster/@"..., 45) = 45 read(9, "\3\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\3\0\0\0", 20) = 20 read(9, "30\0", 3) = 3 close(9) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\2\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\0\0\0\0", 20) = 20 read(9, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20 close(9) = 0 select(7, [6], [6], NULL, {0, 0}) = 0 (Timeout) select(6, [5], [5], NULL, {0, 0}) = 0 (Timeout) select(7, [4 5 6], NULL, NULL, {10, 0}) = 0 (Timeout) socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20 read(9, "\1\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\0\0\0\0", 20) = 20 close(9) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\3\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\31\0\0\0/cluster/@"..., 45) = 45 read(9, "\3\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\3\0\0\0", 20) = 20 read(9, "30\0", 3) = 3 close(9) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 9 connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0 write(9, "\2\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\0\0\0\0", 20) = 20 read(9, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20 close(9) = 0 ... What is clurgmgrd exactly trying to do? Regards, Jie -- Linux-cluster@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/linux-cluster