I had this on the gluster-users list, but really seems to be a bit more of a development topic. On Fri, Feb 26, 2010 at 5:22 PM, Mike Terzo <mterzo@xxxxxxxxx> wrote: > I found this in the archives: > http://gluster.org/pipermail/gluster-users/20081027/000523.html > > I don't see any follow ups with any sort of solution. I'm running > gluster 3.0.2 with patch 2659 with a 64bit client and 64bit server > without any issues. I turned up another box running 32 bit client > pointing to 2 different 64bit servers and I had the same issues listed > above. My kernels on both boxes clients are 2.6.30 neither have any > patches applied. > > Here's my client output > > pending frames: > frame : type(1) op(LOOKUP) > frame : type(1) op(STAT) > frame : type(1) op(STAT) > > patchset: v3.0.2 > signal received: 11 > time of crash: 2010-02-26 16:01:08 > configuration details: > argp 1 > backtrace 1 > dlfcn 1 > fdatasync 1 > libpthread 1 > llistxattr 1 > setfsid 1 > spinlock 1 > epoll.h 1 > xattr.h 1 > st_atim.tv_nsec 1 > package-string: glusterfs 3.0.2 > [0xffffe400] > /usr/lib/libglusterfs.so.0[0xb7ffa8a4] > /usr/lib/libglusterfs.so.0(inode_unref+0x39)[0xb7ffb571] > /usr/lib/libglusterfs.so.0(loc_wipe+0x25)[0xb7feec52] > /usr/lib/libglusterfs.so.0(call_stub_destroy+0x786)[0xb800208e] > /usr/lib/libglusterfs.so.0(call_resume+0x73)[0xb80022a8] > /usr/lib/glusterfs/3.0.2/xlator/performance/io-threads.so(iot_worker_unordered+0x20)[0xb75e0ae3] > /lib/tls/libpthread.so.0[0xb7fcd1ce] > /lib/tls/libc.so.6(__clone+0x5e)[0xb7f6290e] > --------- > Segmentation fault (core dumped) > > I've turned off io-threads and the process hasn't cored again. Is > this a general practice to not mix 32bit client and 64bit servers? > > Thanks, > --mike terzo > With io-threads turned not there, i got a much nicer looking stack trace. pending frames: frame : type(1) op(LOOKUP) frame : type(1) op(STAT) frame : type(1) op(STAT) patchset: v3.0.2 signal received: 11 time of crash: 2010-02-26 18:28:58 configuration details: argp 1 backtrace 1 dlfcn 1 fdatasync 1 libpthread 1 llistxattr 1 setfsid 1 spinlock 1 epoll.h 1 xattr.h 1 st_atim.tv_nsec 1 package-string: glusterfs 3.0.2 [0xffffe400] /usr/lib/libglusterfs.so.0[0xb7f0b8a4] /usr/lib/libglusterfs.so.0(inode_unref+0x39)[0xb7f0c571] /usr/lib/libglusterfs.so.0(loc_wipe+0x25)[0xb7effc52] /usr/lib/glusterfs/3.0.2/xlator/mount/fuse.so[0xb74db3b0] /usr/lib/glusterfs/3.0.2/xlator/mount/fuse.so[0xb74e4c19] /usr/lib/libglusterfs.so.0[0xb7f0261e] /usr/lib/glusterfs/3.0.2/xlator/performance/write-behind.so(wb_stat_cbk+0x179)[0xb74fe035] /usr/lib/libglusterfs.so.0[0xb7f0261e] /usr/lib/glusterfs/3.0.2/xlator/cluster/replicate.so(afr_stat_cbk+0xb9)[0xb751f82a] /usr/lib/glusterfs/3.0.2/xlator/protocol/client.so(client_stat_cbk+0x1bd)[0xb755b525] /usr/lib/glusterfs/3.0.2/xlator/protocol/client.so(protocol_client_interpret+0x1e1)[0xb754b85d] /usr/lib/glusterfs/3.0.2/xlator/protocol/client.so(protocol_client_pollin+0xbe)[0xb754c0af] /usr/lib/glusterfs/3.0.2/xlator/protocol/client.so(notify+0x204)[0xb754f918] /usr/lib/libglusterfs.so.0(xlator_notify+0x39)[0xb7effae4] /usr/lib/glusterfs/3.0.2/transport/socket.so(socket_event_poll_in+0x39)[0xb6cd1f96] /usr/lib/glusterfs/3.0.2/transport/socket.so(socket_event_handler+0x52)[0xb6cd3c6e] /usr/lib/libglusterfs.so.0[0xb7f1962e] /usr/lib/libglusterfs.so.0(event_dispatch+0x21)[0xb7f199ae] glusterfs(main+0xc92)[0x804bcc7] /lib/tls/libc.so.6(__libc_start_main+0xd4)[0xb7dbeea4] glusterfs[0x8049e21] here's the backtrace of the core: (gdb) bt #0 __inode_invalidate (inode=0x805dd14) at inode.c:993 #1 0xb7f0b8a4 in inode_table_prune (table=0x805dd18) at inode.c:1022 #2 0xb7f0c571 in inode_unref (inode=0x805d858) at inode.c:399 #3 0xb7effc52 in loc_wipe (loc=0xb4889aa4) at xlator.c:995 #4 0xb74db3b0 in free_state (state=0xb4889a98) at fuse-bridge.c:182 #5 0xb74e4c19 in fuse_attr_cbk (frame=0xb4892b7c, cookie=0xb04162e0, this=0x8052248, op_ret=0, op_errno=0, buf=0xbfd15c9c) at fuse-bridge.c:731 #6 0xb7f0261e in default_stat_cbk (frame=0xb04162e0, cookie=0xb0667c48, this=0x8058310, op_ret=0, op_errno=0, buf=0x0) at defaults.c:88 #7 0xb74fe035 in wb_stat_cbk (frame=0xb0667c48, cookie=0xb046bf00, this=0x8057dd8, op_ret=0, op_errno=0, buf=0x0) at write-behind.c:543 #8 0xb7f0261e in default_stat_cbk (frame=0xb046bf00, cookie=0xb06bdd28, this=0x8057828, op_ret=0, op_errno=0, buf=0x0) at defaults.c:88 #9 0xb751f82a in afr_stat_cbk (frame=0xb06bdd28, cookie=0x0, this=0x0, op_ret=0, op_errno=0, buf=0xbfd15c9c) at afr-inode-read.c:227 #10 0xb755b525 in client_stat_cbk (frame=0xb04b0b58, hdr=0xb04c5f00, hdrlen=188, iobuf=0x0) at client-protocol.c:4105 #11 0xb754b85d in protocol_client_interpret (this=0x0, trans=0x805a1d8, hdr_p=0xb04c5f00 "", hdrlen=188, iobuf=0x0) at client-protocol.c:6511 #12 0xb754c0af in protocol_client_pollin (this=0x0, trans=0x805a1d8) at client-protocol.c:6809 #13 0xb754f918 in notify (this=0x8056d18, event=2, data=0x805a1d8) at client-protocol.c:6928 #14 0xb7effae4 in xlator_notify (xl=0x8056d18, event=0, data=0x0) at xlator.c:928 #15 0xb6cd1f96 in socket_event_poll_in (this=0x805a1d8) at socket.c:729 #16 0xb6cd3c6e in socket_event_handler (fd=8, idx=0, data=0x805a1d8, poll_in=1, poll_out=0, poll_err=0) at socket.c:829 #17 0xb7f1962e in event_dispatch_epoll (event_pool=0x8051e48) at event.c:804 ---Type <return> to continue, or q <return> to quit--- #18 0xb7f199ae in event_dispatch (event_pool=0x1ece) at event.c:975 #19 0x0804bcc7 in main (argc=5, argv=0xbfd16b54) at glusterfsd.c:1413 and table's address, is way wrong. (gdb) p * inode $4 = {table = 0x78, lock = 1, nlookup = 33870112096256, generation = 4294967296, in_attic = 0, ref = 14057, ino = 578108920368060400, st_mode = 134554184, fd_list = {next = 0x539, prev = 0x805deb8}, dentry_list = { next = 0x8079608, prev = 0xb489c36c}, hash = {next = 0x805db44, prev = 0x24bd9}, list = {next = 0x805dd58, prev = 0x805dd58}, _ctx = 0xfffffac4} (gdb) up #1 0xb7f0b8a4 in inode_table_prune (table=0x805dd18) at inode.c:1022 1022 in inode.c (gdb) p entry $5 = (inode_t *) 0x1ece (gdb) p *entry Cannot access memory at address 0x1ece (gdb) p table $6 = (inode_table_t *) 0x805dd18 (gdb) p *table $7 = {lock = {__m_reserved = 1, __m_count = 0, __m_owner = 0x1ece, __m_kind = 0, __m_lock = {__status = 1, __spinlock = 0}}, hashsize = 14057, name = 0x805d7f0 "fuse/inode", root = 0x805db00, xl = 0x8052248, lru_limit = 1337, inode_hash = 0x805deb8, name_hash = 0x8079608, active = {next = 0xb489c36c, prev = 0x805db44}, active_size = 150489, lru = {next = 0x805dd58, prev = 0x805dd58}, lru_size = 4294965956, lru_callback = 0xb7f0c855 <__inode_invalidate>, purge = {next = 0x805dd68, prev = 0x805dd68}, purge_size = 0, inval = { next = 0xb489c864, prev = 0xb04c7234}, inval_size = 301000, attic = {next = 0xb633bf44, prev = 0x8095a64}, attic_size = 16} looking at inode.c there's a macro that gets called right before this: #define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) (unsigned long) gets used here, which is bad. (gdb) p *((inode_t *) 0x805dd58) $20 = {table = 0x805dd58, lock = 134602072, nlookup = 13254313975044111044, generation = 578111566067916136, in_attic = 0, ref = 3028928612, ino = 1292788113895988, st_mode = 3056844612, fd_list = {next = 0x8095a64, prev = 0x10}, dentry_list = {next = 0x21, prev = 0xb42dff0c}, hash = {next = 0xb42dff0c, prev = 0xb41bcdf8}, list = { next = 0xb5b7f850, prev = 0xb42dfed8}, _ctx = 0x805ddb0} printf("size of: %d\n", sizeof(unsigned long)); on my 32bit system: size of: 4 64bit system: size of: 8 looks like a type conversion is breaking everything. --mike terzo