Re: crash with 1.4.0tla846

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since adding "option self-heal off", I have been unable to reproduce the problem. I re-ran my test (which was rsync) 3 times and couldn't break anything or produce any error messages. So, I hope it's fixed for good :)

The initial population was done with rsync also, using the vol files you have seen.

The only (currently) remaining oddity is this line in the server error log file:

2009-01-14 12:05:32 C [posix.c:2841:ensure_file_type] posix-unify- switch-ns: entry /distfs-storage-space/glusterfs/unify-switch-ns/bio/ db/fasta/blastp-nr is a different type of file than expected

That file, blastp-nr, is actually a symlink to a file named blastp- nr.huge. My users need that file to stay named "blastp-nr", but I needed to give it an extension so the switch scheduler would match it to my stripe volume, because as the file is so gigantic, I wanted the benefit of it being served from all 4 gluster servers. So I renamed the file to blastp-nr.huge (*.huge is in my switch scheduler config line) and made a symlink of the original name to it.

Do you think this is a problem?

Dan Parsons


On Jan 14, 2009, at 11:42 AM, Anand Avati wrote:

Dan,
 for the purpose of debugging, can you do 'option self-heal off' in
unify and try again (with a fresh log file)?

thanks,
avati

2009/1/15 Dan Parsons <dparsons@xxxxxxxx>:
Just to clarify, the below problems occurred on tla 846, which is to my
knowledge, the latest.


Dan Parsons


On Jan 14, 2009, at 11:24 AM, Dan Parsons wrote:

Every time I try to do a big rsync operation, the glusterfs client is
crashing on me. First there are a ton of self-heal messages, one per
directory that rsync is examining, and then eventually a segfault. I tried using tla814 (the officially released rc7) but that broke even faster and
with different error messages.

ANY help on this would be appreciated, I thought I was nearing the end of
this project :-(

OS: CentOS 5.2
Kernel: 2.6.23.14
Fuse: 2.7.3glfs10

Errors:
(imagine a million more of the below line)
2009-01-14 11:20:11 W [unify-self-heal.c: 593:unify_sh_checksum_cbk] unify: Self-heal triggered on directory /bio/db/hmm/panther/books/ PTHR10210/SF3
pending frames:
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)

patchset: glusterfs--mainline--3.0--patch-846
signal received: 11
configuration details:argp 1
backtrace 1
db.h 1
dlfcn 1
fdatasync 1
libpthread 1
llistxattr 1
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 1.4.0tla846
/lib64/libc.so.6[0x359ba301b0]
/lib64/libc.so.6[0x359ba71865]
/lib64/libc.so.6(__libc_malloc+0x7d)[0x359ba72efd]
/lib64/libc.so.6(__strdup+0x22)[0x359ba78092]
/usr/local/lib/libglusterfs.so.0(loc_copy+0x40)[0x2b63d6dbebc0]

/usr/local/lib/glusterfs/1.4.0tla846/xlator/cluster/ unify.so(unify_lookup+0x85)[0x2b63d76b0bb5]

/usr/local/lib/glusterfs/1.4.0tla846/xlator/performance/io- cache.so(ioc_lookup+0xc0)[0x2b63d78bd0e0]

/usr/local/lib/glusterfs/1.4.0tla846/xlator/features/ filter.so(filter_lookup+0xb5)[0x2b63d7ac5135] /usr/local/lib/glusterfs/1.4.0tla846/xlator/mount/ fuse.so[0x2b63d7cd2175] /usr/local/lib/glusterfs/1.4.0tla846/xlator/mount/ fuse.so[0x2b63d7cd4c91]
/lib64/libpthread.so.0[0x359ce06307]
/lib64/libc.so.6(clone+0x6d)[0x359bad1ded]
---------

Client config:

volume unify-switch-ns
type protocol/client
option transport-type tcp
option remote-host 10.8.101.51
option remote-subvolume posix-unify-switch-ns
end-volume

#volume distfs01-ns-readahead
#   type performance/read-ahead
#   option page-size 1MB
#   option page-count 8
#   subvolumes distfs01-ns-brick
#end-volume

#volume unify-switch-ns
#   type performance/write-behind
#   option block-size 1MB
#   option cache-size 3MB
#   subvolumes distfs01-ns-readahead
#end-volume

volume distfs01-unify
type protocol/client
option transport-type tcp
option remote-host 10.8.101.51
option remote-subvolume posix-unify
end-volume

volume distfs02-unify
type protocol/client
option transport-type tcp
option remote-host 10.8.101.52
option remote-subvolume posix-unify
end-volume

volume distfs03-unify
type protocol/client
option transport-type tcp
option remote-host 10.8.101.53
option remote-subvolume posix-unify
end-volume

volume distfs04-unify
type protocol/client
option transport-type tcp
option remote-host 10.8.101.54
option remote-subvolume posix-unify
end-volume

volume distfs01-stripe
type protocol/client
option transport-type tcp
option remote-host 10.8.101.51
option remote-subvolume posix-stripe
end-volume

volume distfs02-stripe
type protocol/client
option transport-type tcp
option remote-host 10.8.101.52
option remote-subvolume posix-stripe
end-volume

volume distfs03-stripe
type protocol/client
option transport-type tcp
option remote-host 10.8.101.53
option remote-subvolume posix-stripe
end-volume

volume distfs04-stripe
type protocol/client
option transport-type tcp
option remote-host 10.8.101.54
option remote-subvolume posix-stripe
end-volume

volume stripe0
      type cluster/stripe
      option block-size *.jar,*.pin:1MB,*:2MB
      subvolumes distfs01-stripe distfs02-stripe distfs03-stripe
distfs04-stripe
end-volume

volume dht0
      type cluster/dht
      subvolumes distfs01-unify distfs02-unify distfs03-unify
distfs04-unify
end-volume

volume unify
      type cluster/unify
      option namespace unify-switch-ns
      option scheduler switch
# send *.phr/psq/pnd etc to stripe0, send the rest to hash
# extensions have to be *.foo* and not simply *.foo or rsync's tmp file
naming will prevent files from being matched
      option scheduler.switch.case
*.phr*:stripe0;*.psq*:stripe0;*.pnd*:stripe0;*.psd*:stripe0;*.pin*:stripe0;*.nsi*:stripe0;*.nin*:stripe0;*.nsd*:stripe0;*.nhr*:stripe0;*.nsq*:stripe0;*.tar*:stripe0;*.tar.gz*:stripe0;*.jar*:stripe0;*.img*:stripe0;*.perf*:stripe0;*.tgz*:stripe0;*.fasta*:stripe0;*.huge*:stripe0
      subvolumes stripe0 dht0
end-volume

volume ioc
type performance/io-cache
subvolumes unify
option cache-size 1500MB
end-volume

volume filter
type features/filter
option fixed-uid 0
option fixed-gid 900
subvolumes ioc
end-volume



Server config:

volume posix-unify
      type storage/posix
      option directory /distfs-storage-space/glusterfs/unify
# the below line is here to make the output of 'df' accurate, as both
volumes are served from the same local drive
      option export-statfs-size off
end-volume

volume posix-stripe
     type storage/posix
     option directory /distfs-storage-space/glusterfs/stripe
end-volume

volume posix-unify-switch-ns
     type storage/posix
option directory /distfs-storage-space/glusterfs/unify-switch- ns
end-volume

volume server
      type protocol/server
      option transport-type tcp
      option auth.addr.posix-unify.allow 10.8.101.*
      option auth.addr.posix-stripe.allow 10.8.101.*
      option auth.addr.posix-unify-switch-ns.allow 10.8.101.*
      subvolumes posix-unify posix-stripe posix-unify-switch-ns
end-volume

Dan Parsons




_______________________________________________
Gluster-devel mailing list
Gluster-devel@xxxxxxxxxx
http://lists.nongnu.org/mailman/listinfo/gluster-devel




_______________________________________________
Gluster-devel mailing list
Gluster-devel@xxxxxxxxxx
http://lists.nongnu.org/mailman/listinfo/gluster-devel







[Index of Archives]     [Gluster Users]     [Ceph Users]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux