If you see the source code in cleanup_and_exit() we are getting the SIGSEGV crash when 'exit(0)' is triggered.
On Mon, Sep 24, 2018 at 1:41 PM Pranith Kumar Karampuri <pkarampu@xxxxxxxxxx> wrote:On Mon, Sep 24, 2018 at 1:36 PM ABHISHEK PALIWAL <abhishpaliwal@xxxxxxxxx> wrote:Hi Sanju,Do you have any update on this?This seems to happen while the process is dying, in libc. I am not completely sure if there is anything glusterfs is contributing to it from the bt at the moment. Do you have any steps to re-create this problem? It is probably better to run the steps with valgrind/address-sanitizer and see if it points to the problem in glusterfs.Regards,AbhishekOn Fri, Sep 21, 2018 at 4:07 PM ABHISHEK PALIWAL <abhishpaliwal@xxxxxxxxx> wrote:Hi Sanju,Output of 't a a bt full'(gdb) t a a bt full
Thread 7 (LWP 1743):
#0 0x00003fffa3ea7e88 in __lll_lock_wait (futex=0x0, private=0) at lowlevellock.c:43
r4 = 128
r7 = 0
arg2 = 128
r5 = 2
r8 = 1
r0 = 221
r3 = 0
r6 = 0
arg1 = 0
__err = 221
__ret = 0
#1 0x00003fffa3e9ef64 in __GI___pthread_mutex_lock (mutex=0x100272a8) at ../nptl/pthread_mutex_lock.c:81
__futex = 0x100272a8
__PRETTY_FUNCTION__ = "__pthread_mutex_lock"
type = <optimized out>
id = <optimized out>
#2 0x00003fffa3f6ce8c in _gf_msg (domain=0x3fff98006c90 "c_glusterfs-client-0", file=0x3fff9fb34de0 "client.c", function=0x3fff9fb34cd8 <__FUNCTION__.18849> "notify",
line=<optimized out>, level=<optimized out>, errnum=<optimized out>, trace=<optimized out>, msgid=114020,
fmt=0x3fff9fb35350 "parent translators are ready, attempting connect on transport") at logging.c:2058
ret = <optimized out>
msgstr = <optimized out>
ap = <optimized out>
this = 0x3fff980061f0
ctx = 0x10027010
callstr = '\000' <repeats 4095 times>
passcallstr = 0
log_inited = 0
__PRETTY_FUNCTION__ = "_gf_msg"
#3 0x00003fff9fb084ac in notify (this=0x3fff980061f0, event=<optimized out>, data="" at client.c:2116
conf = 0x3fff98056dd0
__FUNCTION__ = "notify"
#4 0x00003fffa3f68ca0 in xlator_notify (xl=0x3fff980061f0, event=<optimized out>, data="" out>) at xlator.c:491
old_THIS = 0x3fff98008c50
ret = 0
#5 0x00003fffa3f87700 in default_notify (this=0x3fff98008c50, event=<optimized out>, data="" out>) at defaults.c:2302
list = 0x3fff9800a340
#6 0x00003fff9fac922c in afr_notify (this=0x3fff98008c50, event=1, data="" data2=<optimized out>) at afr-common.c:3967
priv = 0x3fff98010050
i = <optimized out>
up_children = <optimized out>
down_children = <optimized out>
propagate = 1
had_heard_from_all = <optimized out>
---Type <return> to continue, or q <return> to quit---
have_heard_from_all = 0
idx = <optimized out>
ret = 0
call_psh = <optimized out>
input = 0x0
output = 0x0
had_quorum = <optimized out>
has_quorum = <optimized out>
__FUNCTION__ = "afr_notify"
#7 0x00003fff9fad4994 in notify (this=<optimized out>, event=<optimized out>, data="" out>) at afr.c:38
ret = -1
ap = 0x3fffa034cc58 ""
data2 = <optimized out>
#8 0x00003fffa3f68ca0 in xlator_notify (xl=0x3fff98008c50, event=<optimized out>, data="" out>) at xlator.c:491
old_THIS = 0x3fff9800a4c0
ret = 0
#9 0x00003fffa3f87700 in default_notify (this=0x3fff9800a4c0, event=<optimized out>, data="" out>) at defaults.c:2302
list = 0x3fff9800b710
#10 0x00003fff9fa6b1e4 in notify (this=<optimized out>, event=<optimized out>, data="" out>) at io-stats.c:3064
ret = 0
args = {type = IOS_DUMP_TYPE_NONE, u = {logfp = 0x0, dict = 0x0}}
op = 0
list_cnt = 0
throughput = 0
time = 0
is_peek = _gf_false
ap = 0x3fffa034ce68 ""
__FUNCTION__ = "notify"
#11 0x00003fffa3f68ca0 in xlator_notify (xl=0x3fff9800a4c0, event=<optimized out>, data="" out>) at xlator.c:491
old_THIS = 0x3fffa402d290 <global_xlator>
ret = 0
#12 0x00003fffa3fbd560 in glusterfs_graph_parent_up (graph=<optimized out>) at graph.c:440
trav = 0x3fff9800a4c0
ret = <optimized out>
#13 0x00003fffa3fbdb90 in glusterfs_graph_activate (graph=0x3fff98000af0, ctx=0x10027010) at graph.c:688
ret = <optimized out>
__FUNCTION__ = "glusterfs_graph_activate"
#14 0x000000001000a49c in glusterfs_process_volfp (ctx=0x10027010, fp=0x3fff98001cd0) at glusterfsd.c:2221
graph = 0x3fff98000af0
ret = <optimized out>
trav = <optimized out>
__FUNCTION__ = <error reading variable __FUNCTION__ (Cannot access memory at address 0x10010ec0)>
#15 0x000000001000fd08 in mgmt_getspec_cbk (req=<optimized out>, iov=<optimized out>, count=<optimized out>, myframe=0x3fffa2bea06c) at glusterfsd-mgmt.c:1561
rsp = {op_ret = 1059, op_errno = 0,
spec = 0x3fff980018a0 "volume c_glusterfs-client-0\n type protocol/client\n option password 5fd8d83d-99f3-4630-97c6-965d7a8ead62\n option username e65687aa-e135-445e-8778-48bb8fb19640\n option transport-type tcp\n "..., xdata = {xdata_len = 0, xdata_val = 0x0}}
---Type <return> to continue, or q <return> to quit---
frame = 0x3fffa2bea06c
ctx = 0x10027010
ret = <optimized out>
size = 1059
tmpfp = 0x3fff98001cd0
volfilebuf = 0x0
__FUNCTION__ = <error reading variable __FUNCTION__ (Cannot access memory at address 0x10013570)>
#16 0x00003fffa3f21ec4 in rpc_clnt_handle_reply (clnt=0x10089020, pollin=0x3fff98001760) at rpc-clnt.c:775
conn = 0x10089050
saved_frame = <optimized out>
ret = <optimized out>
req = 0x1008931c
xid = 1
__FUNCTION__ = "rpc_clnt_handle_reply"
#17 0x00003fffa3f223d0 in rpc_clnt_notify (trans=<optimized out>, mydata=0x10089050, event=<optimized out>, data="" out>) at rpc-clnt.c:933
conn = 0x10089050
clnt = <optimized out>
ret = -1
req_info = 0x0
pollin = <optimized out>
clnt_mydata = 0x0
old_THIS = 0x3fffa402d290 <global_xlator>
__FUNCTION__ = "rpc_clnt_notify"
#18 0x00003fffa3f1d4fc in rpc_transport_notify (this=<optimized out>, event=<optimized out>, data="" out>) at rpc-transport.c:546
ret = -1
__FUNCTION__ = "rpc_transport_notify"
#19 0x00003fffa0401d44 in socket_event_poll_in (this=this@entry=0x1008ab80) at socket.c:2236
ret = <optimized out>
pollin = 0x3fff98001760
priv = 0x1008b820
#20 0x00003fffa040489c in socket_event_handler (fd=<optimized out>, idx=<optimized out>, data="" poll_in=<optimized out>, poll_out=<optimized out>, poll_err=<optimized out>)
at socket.c:2349
this = 0x1008ab80
priv = 0x1008b820
ret = <optimized out>
__FUNCTION__ = "socket_event_handler"
#21 0x00003fffa3fe2874 in event_dispatch_epoll_handler (event=0x3fffa034d6a0, event_pool=0x10045bc0) at event-epoll.c:575
handler = @0x3fffa041f620: 0x3fffa04046f0 <socket_event_handler>
gen = 1
slot = 0x1007cd80
data = "" out>
ret = -1
fd = 9
ev_data = 0x3fffa034d6a8
idx = 1
#22 event_dispatch_epoll_worker (data="" at event-epoll.c:678
---Type <return> to continue, or q <return> to quit---
event = {events = 1, data = "" = 0x100000001, fd = 1, u32 = 1, u64 = 4294967297}}
ret = <optimized out>
ev_data = 0x1008bd50
event_pool = 0x10045bc0
myindex = <optimized out>
timetodie = 0
__FUNCTION__ = "event_dispatch_epoll_worker"
#23 0x00003fffa3e9bb30 in start_thread (arg=0x3fffa034e160) at pthread_create.c:462
pd = 0x3fffa034e160
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {2380233324717718430, 70367199403008, 2380233324703897146, 0, 0, 70367128645632, 70367137030688, 8388608, 70367199363104, 269008208,
70368094386592, 70367199388632, 70367200825640, 3, 0, 70367199388648, 70368094386240, 70368094386296, 4001536, 70367199364120, 70367137027904, -3187653596,
0 <repeats 42 times>}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = "" = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
pagesize_m1 = <optimized out>
sp = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#24 0x00003fffa3de60fc in .__clone () at ../sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S:96
No locals.
Thread 6 (LWP 1735):
#0 0x00003fffa3ea3ccc in __pthread_cond_timedwait (cond=0x10059a98, mutex=0x10059a70, abstime=0x3fffa141f670) at pthread_cond_timedwait.c:198
r4 = 393
r7 = 0
arg5 = 0
arg2 = <optimized out>
r5 = 2
r8 = 4294967295
arg6 = 4294967295
arg3 = 2
r0 = 221
r3 = 516
r6 = 70367154665072
arg4 = 70367154665072
arg1 = 268802716
__err = <optimized out>
__ret = <optimized out>
futex_val = 2
buffer = {__routine = @0x3fffa3ec0b50: 0x3fffa3ea3400 <__condvar_cleanup>, __arg = 0x3fffa141f540, __canceltype = 0, __prev = 0x0}
cbuffer = {oldtype = 0, cond = 0x10059a98, mutex = 0x10059a70, bc_seq = 0}
result = 0
pshared = 0
pi_flag = 0
err = <optimized out>
val = <optimized out>
---Type <return> to continue, or q <return> to quit---
seq = 0
#1 0x00003fffa3fc0e74 in syncenv_task (proc=0x10053eb0) at syncop.c:607
env = 0x10053eb0
task = 0x0
sleep_till = {tv_sec = 1536845230, tv_nsec = 0}
ret = <optimized out>
#2 0x00003fffa3fc1cdc in syncenv_processor (thdata=0x10053eb0) at syncop.c:699
env = 0x10053eb0
proc = 0x10053eb0
task = <optimized out>
#3 0x00003fffa3e9bb30 in start_thread (arg=0x3fffa1420160) at pthread_create.c:462
pd = 0x3fffa1420160
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {2380233324731669406, 70367199403008, 2380233324703897146, 0, 0, 70367146283008, 70367154668064, 8388608, 70367199363104, 268779184,
268779184, 70367199388632, 70367200820192, 3, 0, 70367199388648, 70368094386080, 70368094386136, 4001536, 70367199364120, 70367154665280, -3187653564,
0 <repeats 42 times>}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = "" = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
pagesize_m1 = <optimized out>
sp = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#4 0x00003fffa3de60fc in .__clone () at ../sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S:96
No locals.
Thread 5 (LWP 1746):
#0 0x00003fffa3ea7e38 in __lll_lock_wait (futex=0x100272a8, private=<optimized out>) at lowlevellock.c:46
r4 = 128
r7 = 0
r5 = 2
r8 = 1
arg3 = 2
r0 = 221
r3 = 512
r6 = 0
arg4 = 0
arg1 = 268595880
__err = <optimized out>
__ret = <optimized out>
#1 0x00003fffa3e9ef64 in __GI___pthread_mutex_lock (mutex=0x100272a8) at ../nptl/pthread_mutex_lock.c:81
__futex = 0x100272a8
__PRETTY_FUNCTION__ = "__pthread_mutex_lock"
type = <optimized out>
id = <optimized out>
#2 0x00003fffa3f6ce8c in _gf_msg (domain=0x3fffa4009e38 "epoll", file=0x3fffa4009e28 "event-epoll.c", function=0x3fffa4009db8 <__FUNCTION__.8510> "event_dispatch_epoll_worker",
line=<optimized out>, level=<optimized out>, errnum=<optimized out>, trace=<optimized out>, msgid=101190, fmt=0x3fffa4009f48 "Started thread with index %d") at logging.c:2058
ret = <optimized out>
---Type <return> to continue, or q <return> to quit---
msgstr = <optimized out>
ap = <optimized out>
this = 0x3fffa402d290 <global_xlator>
ctx = 0x10027010
callstr = '\000' <repeats 4095 times>
passcallstr = 0
log_inited = 0
__PRETTY_FUNCTION__ = "_gf_msg"
#3 0x00003fffa3fe265c in event_dispatch_epoll_worker (data="" at event-epoll.c:631
event = {events = 0, data = "" = 0x0, fd = 0, u32 = 0, u64 = 0}}
ret = -1
ev_data = 0x3fff9802ffd0
event_pool = 0x10045bc0
myindex = <optimized out>
timetodie = 0
__FUNCTION__ = "event_dispatch_epoll_worker"
#4 0x00003fffa3e9bb30 in start_thread (arg=0x3fff9f37d160) at pthread_create.c:462
pd = 0x3fff9f37d160
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {2380233325439331230, 70367199403008, 2380233324703897146, 0, 269006880, 70367112060928, 70367120445984, 8388608, 70367199363104,
70366999543760, 70367137025008, 70367199388632, 70367200825640, 3, 0, 70367199388648, 70367137024656, 70367137024712, 4001536, 70367199364120, 70367120443200, -3183328188,
0 <repeats 42 times>}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = "" = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
pagesize_m1 = <optimized out>
sp = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#5 0x00003fffa3de60fc in .__clone () at ../sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S:96
No locals.
Thread 4 (LWP 1733):
#0 0x00003fffa3ea9150 in .__nanosleep () at ../sysdeps/unix/syscall-template.S:84
No locals.
#1 0x00003fffa3f93f28 in gf_timer_proc (ctx=0x10027010) at timer.c:205
now = 20528921491
now_ts = {tv_sec = 20, tv_nsec = 528921491}
reg = 0x100533c0
sleepts = {tv_sec = 0, tv_nsec = 2000000}
event = 0x1008d280
old_THIS = <optimized out>
__FUNCTION__ = "gf_timer_proc"
#2 0x00003fffa3e9bb30 in start_thread (arg=0x3fffa2420160) at pthread_create.c:462
pd = 0x3fffa2420160
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {2380233324681337758, 70367199403008, 2380233324703897146, 0, 0, 70367163060224, 70367171445280, 8388608, 70367199363104, 268595216,
268776384, 70367199388632, 70367200807088, 3, 0, 70367199388648, 70368094385584, 70368094385640, 4001536, 70367199364120, 70367171442496, -3187653564,
---Type <return> to continue, or q <return> to quit---
0 <repeats 42 times>}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = "" = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
pagesize_m1 = <optimized out>
sp = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#3 0x00003fffa3de60fc in .__clone () at ../sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S:96
No locals.
Thread 3 (LWP 1732):
#0 0x00003fffa3e9d084 in pthread_join (threadid=70367137030496, thread_return=0x0) at pthread_join.c:90
r4 = 0
r7 = 2
arg2 = 0
r5 = 1743
r8 = 2
arg3 = 1743
r0 = 221
r3 = 512
r6 = 0
arg4 = 0
arg1 = 70367137030704
__err = <optimized out>
__ret = <optimized out>
__tid = 1743
_buffer = {__routine = @0x3fffa3ec0478: 0x3fffa3e9cf70 <cleanup>, __arg = 0x3fffa034e588, __canceltype = 16383, __prev = 0x0}
oldtype = 0
self = 0x3fffa405e000
result = 0
#1 0x00003fffa3fe2ca0 in event_dispatch_epoll (event_pool=0x10045bc0) at event-epoll.c:762
i = <optimized out>
t_id = 70367137030496
pollercount = 1
ret = 0
ev_data = <optimized out>
__FUNCTION__ = "event_dispatch_epoll"
#2 0x00003fffa3fab7e4 in event_dispatch (event_pool=<optimized out>) at event.c:128
ret = -1
__FUNCTION__ = "event_dispatch"
#3 0x0000000010005ea8 in main (argc=<optimized out>, argv=<optimized out>) at glusterfsd.c:2380
ctx = 0x10027010
ret = 0
cmdlinestr = "/usr/sbin/glusterfs -s localhost --volfile-id gluster/glustershd -p /system/glusterd/glustershd/run/glustershd.pid -l /var/log/glusterfs/glustershd.log -S /var/run/gluster/868ec0f3795bfb5abd176dadece9"...
cmd = 0x10027010
__FUNCTION__ = <error reading variable __FUNCTION__ (Cannot access memory at address 0x10010f00)>
---Type <return> to continue, or q <return> to quit---
#4 0x00003fffa3cfe318 in generic_start_main (main=0x10025718 <main>, argc=<optimized out>, argv=0x3fffd9450fd8, auxvec=0x3fffd9451068, init=<optimized out>, rtld_fini=<optimized out>,
stack_end=<optimized out>, fini=<optimized out>) at ../csu/libc-start.c:289
self = 0x3fffa405e000
result = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {2380233326476224334, 70367199177216, 2380233324705844766, 0 <repeats 13 times>, 70367201122640, 0, 70367201122648, 0, 268589712,
-3724539774, 0 <repeats 42 times>}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x3fffd9450ed0, 0x0}, data = "" = 0x0, cleanup = 0x0, canceltype = 16383}}}
not_first_call = <optimized out>
#5 0x00003fffa3cfe54c in __libc_start_main (argc=<optimized out>, argv=<optimized out>, ev=<optimized out>, auxvec=<optimized out>, rtld_fini=<optimized out>, stinfo=<optimized out>,
stack_on_entry=<optimized out>) at ../sysdeps/unix/sysv/linux/powerpc/libc-start.c:93
No locals.
#6 0x0000000000000000 in ?? ()
No symbol table info available.
Thread 2 (LWP 1736):
#0 0x00003fffa3ea3ccc in __pthread_cond_timedwait (cond=0x10059a98, mutex=0x10059a70, abstime=0x3fffa0c1f670) at pthread_cond_timedwait.c:198
r4 = 393
r7 = 0
arg5 = 0
arg2 = <optimized out>
r5 = 2
r8 = 4294967295
arg6 = 4294967295
arg3 = 2
r0 = 221
r3 = 516
r6 = 70367146276464
arg4 = 70367146276464
arg1 = 268802716
__err = <optimized out>
__ret = <optimized out>
futex_val = 2
buffer = {__routine = @0x3fffa3ec0b50: 0x3fffa3ea3400 <__condvar_cleanup>, __arg = 0x3fffa0c1f540, __canceltype = 0, __prev = 0x0}
cbuffer = {oldtype = 0, cond = 0x10059a98, mutex = 0x10059a70, bc_seq = 0}
result = 0
pshared = 0
pi_flag = 0
err = <optimized out>
val = <optimized out>
seq = 0
#1 0x00003fffa3fc0e74 in syncenv_task (proc=0x10054468) at syncop.c:607
env = 0x10053eb0
task = 0x0
sleep_till = {tv_sec = 1536845230, tv_nsec = 0}
ret = <optimized out>
#2 0x00003fffa3fc1cdc in syncenv_processor (thdata=0x10054468) at syncop.c:699
env = 0x10053eb0
---Type <return> to continue, or q <return> to quit---
proc = 0x10054468
task = <optimized out>
#3 0x00003fffa3e9bb30 in start_thread (arg=0x3fffa0c20160) at pthread_create.c:462
pd = 0x3fffa0c20160
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {2380233324723280798, 70367199403008, 2380233324703897146, 0, 0, 70367137894400, 70367146279456, 8388608, 70367199363104, 268780648,
268780648, 70367199388632, 70367200820192, 3, 0, 70367199388648, 70368094386080, 70368094386136, 4001536, 70367199364120, 70367146276672, -3187653564,
0 <repeats 42 times>}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = "" = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
pagesize_m1 = <optimized out>
sp = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#4 0x00003fffa3de60fc in .__clone () at ../sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S:96
No locals.
Thread 1 (LWP 1734):
#0 0x00003fffa3d643b0 in _IO_unbuffer_all () at genops.c:960
__result = 0
__self = 0x3fffa1c20160
cnt = 1
fp = 0x3fff98001f00
#1 _IO_cleanup () at genops.c:1020
result = 0
#2 0x00003fffa3d1ed00 in __run_exit_handlers (status=<optimized out>, listp=<optimized out>, run_list_atexit=run_list_atexit@entry=true) at exit.c:95
ptr = 0x3fffa3e75000 <__elf_set___libc_atexit_element__IO_cleanup__>
#3 0x00003fffa3d1ee1c in __GI_exit (status=<optimized out>) at exit.c:104
No locals.
#4 0x000000001000984c in cleanup_and_exit (signum=<optimized out>) at glusterfsd.c:1295
ctx = <optimized out>
trav = <optimized out>
__FUNCTION__ = <error reading variable __FUNCTION__ (Cannot access memory at address 0x10010e38)>
#5 0x0000000010009a64 in glusterfs_sigwaiter (arg=<optimized out>) at glusterfsd.c:2016
set = {__val = {18947, 0 <repeats 15 times>}}
ret = <optimized out>
sig = 15
#6 0x00003fffa3e9bb30 in start_thread (arg=0x3fffa1c20160) at pthread_create.c:462
<p class="m_-2182742917493080152m_552285968122610423m_3610462426325886337gmail-x_MsoNormal" style="margin:0cm 0cm 0.0001pt;font-size:11pt;font-fam
--
_______________________________________________ Gluster-devel mailing list Gluster-devel@xxxxxxxxxxx https://lists.gluster.org/mailman/listinfo/gluster-devel