Re: volume not working after yum update - gluster 3.6.3

Kingsley <gluster@xxxxxxxxxxxxxxxxxxx> · Mon, 10 Aug 2015 18:04:37 +0100

On Mon, 2015-08-10 at 22:22 +0530, Atin Mukherjee wrote:

[snip]

    strace output claims the command exited successfully. Are you sure ls got hung?

Not sure, but this one definitely hung. 'mkdir("test", 0777' was the last output, and it's been stuck here for about 7 minutes now:

[root@voicemail1b-1 14391.broken]# strace mkdir test

execve("/usr/bin/mkdir", ["mkdir", "test"], [/* 27 vars */]) = 0

brk(0)                                  = 0x8db000

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3468a89000

access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)

open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3

fstat(3, {st_mode=S_IFREG|0644, st_size=31874, ...}) = 0

mmap(NULL, 31874, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3468a81000

close(3)                                = 0

open("/lib64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3

read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240d\0\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=147120, ...}) = 0

mmap(NULL, 2246784, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f3468644000

mprotect(0x7f3468665000, 2097152, PROT_NONE) = 0

mmap(0x7f3468865000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x21000) = 0x7f3468865000

mmap(0x7f3468867000, 6272, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f3468867000

close(3)                                = 0

open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3

read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\34\2\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=2107760, ...}) = 0

mmap(NULL, 3932736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f3468283000

mprotect(0x7f3468439000, 2097152, PROT_NONE) = 0

mmap(0x7f3468639000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7f3468639000

mmap(0x7f346863f000, 16960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f346863f000

close(3)                                = 0

open("/lib64/libpcre.so.1", O_RDONLY|O_CLOEXEC) = 3

read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360\25\0\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=398272, ...}) = 0

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3468a80000

mmap(NULL, 2490888, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f3468022000

mprotect(0x7f3468081000, 2097152, PROT_NONE) = 0

mmap(0x7f3468281000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5f000) = 0x7f3468281000

close(3)                                = 0

open("/lib64/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3

read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000/\0\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=153184, ...}) = 0

mmap(NULL, 2245240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f3467dfd000

mprotect(0x7f3467e21000, 2093056, PROT_NONE) = 0

mmap(0x7f3468020000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x23000) = 0x7f3468020000

close(3)                                = 0

open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3

read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\16\0\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=19512, ...}) = 0

mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f3467bf9000

mprotect(0x7f3467bfc000, 2093056, PROT_NONE) = 0

mmap(0x7f3467dfb000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f3467dfb000

close(3)                                = 0

open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3

read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240l\0\0\0\0\0\0"..., 832) = 832

fstat(3, {st_mode=S_IFREG|0755, st_size=141616, ...}) = 0

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3468a7f000

mmap(NULL, 2208864, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f34679dd000

mprotect(0x7f34679f3000, 2097152, PROT_NONE) = 0

mmap(0x7f3467bf3000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f3467bf3000

mmap(0x7f3467bf5000, 13408, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f3467bf5000

close(3)                                = 0

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3468a7e000

mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3468a7c000

arch_prctl(ARCH_SET_FS, 0x7f3468a7c800) = 0

mprotect(0x7f3468639000, 16384, PROT_READ) = 0

mprotect(0x7f3467bf3000, 4096, PROT_READ) = 0

mprotect(0x7f3467dfb000, 4096, PROT_READ) = 0

mprotect(0x7f3468020000, 4096, PROT_READ) = 0

mprotect(0x7f3468281000, 4096, PROT_READ) = 0

mprotect(0x7f3468865000, 4096, PROT_READ) = 0

mprotect(0x611000, 4096, PROT_READ)     = 0

mprotect(0x7f3468a8a000, 4096, PROT_READ) = 0

munmap(0x7f3468a81000, 31874)           = 0

set_tid_address(0x7f3468a7cad0)         = 24942

set_robust_list(0x7f3468a7cae0, 24)     = 0

rt_sigaction(SIGRTMIN, {0x7f34679e3780, [], SA_RESTORER|SA_SIGINFO, 0x7f34679ec130}, NULL, 8) = 0

rt_sigaction(SIGRT_1, {0x7f34679e3810, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f34679ec130}, NULL, 8) = 0

rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0

getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0

statfs("/sys/fs/selinux", {f_type=0xf97cff8c, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0

statfs("/sys/fs/selinux", {f_type=0xf97cff8c, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0

stat("/sys/fs/selinux", {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0

brk(0)                                  = 0x8db000

brk(0x8fc000)                           = 0x8fc000

mkdir("test", 0777  

    >

    >

    >

    >

    >> >

    >> > Then ... do I need to run something on one of the bricks while strace is

    >> > running?

    >> >

    >> > Cheers,

    >> > Kingsley.

    >> >

    >> >

    >> > > >

    >> > > > [root@gluster1b-1 ~]# gluster volume heal callrec info

    >> > > > Brick gluster1a-1.dns99.co.uk:/data/brick/callrec/

    >> > > > <gfid:164f888f-2049-49e6-ad26-c758ee091863>

    >> > > > /recordings/834723/14391 - Possibly undergoing heal

    >> > > >

    >> > > > <gfid:e280b40c-d8b7-43c5-9da7-4737054d7a7f>

    >> > > > <gfid:b1fbda4a-732f-4f5d-b5a1-8355d786073e>

    >> > > > <gfid:edb74524-b4b7-4190-85e7-4aad002f6e7c>

    >> > > > <gfid:9b8b8446-1e27-4113-93c2-6727b1f457eb>

    >> > > > <gfid:650efeca-b45c-413b-acc3-f0a5853ccebd>

    >> > > > Number of entries: 7

    >> > > >

    >> > > > Brick gluster1b-1.dns99.co.uk:/data/brick/callrec/

    >> > > > Number of entries: 0

    >> > > >

    >> > > > Brick gluster2a-1.dns99.co.uk:/data/brick/callrec/

    >> > > > <gfid:e280b40c-d8b7-43c5-9da7-4737054d7a7f>

    >> > > > <gfid:164f888f-2049-49e6-ad26-c758ee091863>

    >> > > > <gfid:650efeca-b45c-413b-acc3-f0a5853ccebd>

    >> > > > <gfid:b1fbda4a-732f-4f5d-b5a1-8355d786073e>

    >> > > > /recordings/834723/14391 - Possibly undergoing heal

    >> > > >

    >> > > > <gfid:edb74524-b4b7-4190-85e7-4aad002f6e7c>

    >> > > > <gfid:9b8b8446-1e27-4113-93c2-6727b1f457eb>

    >> > > > Number of entries: 7

    >> > > >

    >> > > > Brick gluster2b-1.dns99.co.uk:/data/brick/callrec/

    >> > > > Number of entries: 0

    >> > > >

    >> > > >

    >> > > > If I query each brick directly for the number of files/directories

    >> > > > within that, I get 1731 on gluster1a-1 and gluster2a-1, but 1737 on

    >> > > the

    >> > > > other two, using this command:

    >> > > >

    >> > > > # find /data/brick/callrec/recordings/834723/14391 -print | wc -l

    >> > > >

    >> > > > Cheers,

    >> > > > Kingsley.

    >> > > >

    >> > > > On Mon, 2015-08-10 at 11:05 +0100, Kingsley wrote:

    >> > > > > Sorry for the blind panic - restarting the volume seems to have

    >> > > fixed

    >> > > > > it.

    >> > > > >

    >> > > > > But then my next question - why is this necessary? Surely it

    >> > > undermines

    >> > > > > the whole point of a high availability system?

    >> > > > >

    >> > > > > Cheers,

    >> > > > > Kingsley.

    >> > > > >

    >> > > > > On Mon, 2015-08-10 at 10:53 +0100, Kingsley wrote:

    >> > > > > > Hi,

    >> > > > > >

    >> > > > > > We have a 4 way replicated volume using gluster 3.6.3 on CentOS

    >> > > 7.

    >> > > > > >

    >> > > > > > Over the weekend I did a yum update on each of the bricks in

    >> > > turn, but

    >> > > > > > now when clients (using fuse mounts) try to access the volume,

    >> > > it hangs.

    >> > > > > > Gluster itself wasn't updated (we've disabled that repo so that

    >> > > we keep

    >> > > > > > to 3.6.3 for now).

    >> > > > > >

    >> > > > > > This was what I did:

    >> > > > > >

    >> > > > > >       * on first brick, "yum update"

    >> > > > > >       * reboot brick

    >> > > > > >       * watch "gluster volume status" on another brick and wait

    >> > > for it

    >> > > > > >         to say all 4 bricks are online before proceeding to

    >> > > update the

    >> > > > > >         next brick

    >> > > > > >

    >> > > > > > I was expecting the clients might pause 30 seconds while they

    >> > > notice a

    >> > > > > > brick is offline, but then recover.

    >> > > > > >

    >> > > > > > I've tried re-mounting clients, but that hasn't helped.

    >> > > > > >

    >> > > > > > I can't see much data in any of the log files.

    >> > > > > >

    >> > > > > > I've tried "gluster volume heal callrec" but it doesn't seem to

    >> > > have

    >> > > > > > helped.

    >> > > > > >

    >> > > > > > What shall I do next?

    >> > > > > >

    >> > > > > > I've pasted some stuff below in case any of it helps.

    >> > > > > >

    >> > > > > > Cheers,

    >> > > > > > Kingsley.

    >> > > > > >

    >> > > > > > [root@gluster1b-1 ~]# gluster volume info callrec

    >> > > > > >

    >> > > > > > Volume Name: callrec

    >> > > > > > Type: Replicate

    >> > > > > > Volume ID: a39830b7-eddb-4061-b381-39411274131a

    >> > > > > > Status: Started

    >> > > > > > Number of Bricks: 1 x 4 = 4

    >> > > > > > Transport-type: tcp

    >> > > > > > Bricks:

    >> > > > > > Brick1: gluster1a-1:/data/brick/callrec

    >> > > > > > Brick2: gluster1b-1:/data/brick/callrec

    >> > > > > > Brick3: gluster2a-1:/data/brick/callrec

    >> > > > > > Brick4: gluster2b-1:/data/brick/callrec

    >> > > > > > Options Reconfigured:

    >> > > > > > performance.flush-behind: off

    >> > > > > > [root@gluster1b-1 ~]#

    >> > > > > >

    >> > > > > >

    >> > > > > > [root@gluster1b-1 ~]# gluster volume status callrec

    >> > > > > > Status of volume: callrec

    >> > > > > > Gluster process                                         Port

    >> > > Online  Pid

    >> > > > > >

    >> > > ------------------------------------------------------------------------------

    >> > > > > > Brick gluster1a-1:/data/brick/callrec                   49153

    >> > >  Y       6803

    >> > > > > > Brick gluster1b-1:/data/brick/callrec                   49153

    >> > >  Y       2614

    >> > > > > > Brick gluster2a-1:/data/brick/callrec                   49153

    >> > >  Y       2645

    >> > > > > > Brick gluster2b-1:/data/brick/callrec                   49153

    >> > >  Y       4325

    >> > > > > > NFS Server on localhost                                 2049

    >> > > Y       2769

    >> > > > > > Self-heal Daemon on localhost                           N/A

    >> > >  Y       2789

    >> > > > > > NFS Server on gluster2a-1                               2049

    >> > > Y       2857

    >> > > > > > Self-heal Daemon on gluster2a-1                         N/A

    >> > >  Y       2814

    >> > > > > > NFS Server on 88.151.41.100                             2049

    >> > > Y       6833

    >> > > > > > Self-heal Daemon on 88.151.41.100                       N/A

    >> > >  Y       6824

    >> > > > > > NFS Server on gluster2b-1                               2049

    >> > > Y       4428

    >> > > > > > Self-heal Daemon on gluster2b-1                         N/A

    >> > >  Y       4387

    >> > > > > >

    >> > > > > > Task Status of Volume callrec

    >> > > > > >

    >> > > ------------------------------------------------------------------------------

    >> > > > > > There are no active volume tasks

    >> > > > > >

    >> > > > > > [root@gluster1b-1 ~]#

    >> > > > > >

    >> > > > > >

    >> > > > > > [root@gluster1b-1 ~]# gluster volume heal callrec info

    >> > > > > > Brick gluster1a-1.dns99.co.uk:/data/brick/callrec/

    >> > > > > > /to_process - Possibly undergoing heal

    >> > > > > >

    >> > > > > > Number of entries: 1

    >> > > > > >

    >> > > > > > Brick gluster1b-1.dns99.co.uk:/data/brick/callrec/

    >> > > > > > Number of entries: 0

    >> > > > > >

    >> > > > > > Brick gluster2a-1.dns99.co.uk:/data/brick/callrec/

    >> > > > > > /to_process - Possibly undergoing heal

    >> > > > > >

    >> > > > > > Number of entries: 1

    >> > > > > >

    >> > > > > > Brick gluster2b-1.dns99.co.uk:/data/brick/callrec/

    >> > > > > > Number of entries: 0

    >> > > > > >

    >> > > > > > [root@gluster1b-1 ~]#

    >> > > > > >

    >> > > > > >

    >> > > > > > _______________________________________________

    >> > > > > > Gluster-users mailing list

    >> > > > > > Gluster-users@xxxxxxxxxxx

    >> > > > > > http://www.gluster.org/mailman/listinfo/gluster-users

    >> > > > > >

    >> > > > >

    >> > > > > _______________________________________________

    >> > > > > Gluster-users mailing list

    >> > > > > Gluster-users@xxxxxxxxxxx

    >> > > > > http://www.gluster.org/mailman/listinfo/gluster-users

    >> > > > >

    >> > > >

    >> > > > _______________________________________________

    >> > > > Gluster-users mailing list

    >> > > > Gluster-users@xxxxxxxxxxx

    >> > > > http://www.gluster.org/mailman/listinfo/gluster-users

    >> > >

    >> > >

    >> >

    >>

    >>

_______________________________________________
Gluster-users mailing list
Gluster-users@xxxxxxxxxxx
http://www.gluster.org/mailman/listinfo/gluster-users

_______________________________________________
Gluster-users mailing list
Gluster-users@xxxxxxxxxxx
http://www.gluster.org/mailman/listinfo/gluster-users