Justin, I could only get a chance to look into this yesterday as last week I was in RHEL 7 training. I've some interesting facts to share which is as follows: When I execute the script which sets options for two different volumes (one in background) in a loop, after few iterations the memory consumption of glusterd jumps to more than 1G. The iteration cycle of the occurrence is not fixed though. However there is a constant increase in memory consumption in every iteration which proves that there is certainly a memory leak here. I am going to spend some time today to debug it further and hope to get the root cause. ~Atin On 12/09/2014 03:43 AM, Justin Clift wrote: > How's this investigation stuff going? :) > > + Justin > > > ----- Original Message ----- >> Agreed Justin. It looks like a bug only. >> I found something significant now. I wrote a small script to execute two >> volume set commands in parallel (for two different volumes) in a loop of >> 1000 iterations and I figured out glusterd hung after few iterations. >> Glusterd was not dead but it didn't respond at all. >> >> Following is the backtrace: >> >> Thread 6 (Thread 0x7f6f6ecf5700 (LWP 30417)): >> #0 0x00007f6f76d67fbd in nanosleep () from /lib64/libpthread.so.0 >> #1 0x00007f6f77a04014 in gf_timer_proc (ctx=0x1396010) at timer.c:170 >> #2 0x00007f6f76d60f33 in start_thread () from /lib64/libpthread.so.0 >> #3 0x00007f6f766a7ded in clone () from /lib64/libc.so.6 >> >> Thread 5 (Thread 0x7f6f6e4f4700 (LWP 30418)): >> #0 0x00007f6f76d684f1 in sigwait () from /lib64/libpthread.so.0 >> #1 0x00000000004079e7 in glusterfs_sigwaiter (arg=<optimized out>) at >> glusterfsd.c:1728 >> #2 0x00007f6f76d60f33 in start_thread () from /lib64/libpthread.so.0 >> #3 0x00007f6f766a7ded in clone () from /lib64/libc.so.6 >> >> Thread 4 (Thread 0x7f6f6dcf3700 (LWP 30419)): >> #0 0x00007f6f76d6759d in __lll_lock_wait () from /lib64/libpthread.so.0 >> #1 0x00007f6f76d63194 in _L_lock_874 () from /lib64/libpthread.so.0 >> #2 0x00007f6f76d63093 in pthread_mutex_lock () from /lib64/libpthread.so.0 >> #3 0x00007f6f766e2612 in _dl_addr () from /lib64/libc.so.6 >> #4 0x00007f6f766b67d5 in backtrace_symbols_fd () from /lib64/libc.so.6 >> #5 0x00007f6f779fdc88 in gf_backtrace_fillframes ( >> buf=buf@entry=0x13eb280 "(--> >> /usr/local/lib/libglusterfs.so.0(synctask_yield+0x2c)[0x7f6f77a21cac] >> (--> /usr/local/lib/libglusterfs.so.0(+0x57dc9)[0x7f6f77a21dc9] (--> >> /usr/local/lib/libglusterfs.so.0(synclock_lock+0x16)[0x7"...) at >> common-utils.c:3335 >> ---Type <return> to continue, or q <return> to quit--- >> #6 0x00007f6f77a03631 in gf_backtrace_save ( >> buf=buf@entry=0x13eb280 "(--> >> /usr/local/lib/libglusterfs.so.0(synctask_yield+0x2c)[0x7f6f77a21cac] >> (--> /usr/local/lib/libglusterfs.so.0(+0x57dc9)[0x7f6f77a21dc9] (--> >> /usr/local/lib/libglusterfs.so.0(synclock_lock+0x16)[0x7"...) at >> common-utils.c:3391 >> #7 0x00007f6f77a21cac in synctask_yield (task=task@entry=0x13eadf0) at >> syncop.c:296 >> #8 0x00007f6f77a21dc9 in __synclock_lock (lock=lock@entry=0x13e0138) at >> syncop.c:760 >> #9 0x00007f6f77a24586 in synclock_lock (lock=lock@entry=0x13e0138) at >> syncop.c:784 >> #10 0x00007f6f6ca3242d in glusterd_big_locked_cbk (req=0x13e34ec, >> iov=0x0, count=0, >> myframe=0x7f6f759ff24c, fn=0x7f6f6ca766d0 <_gd_syncop_stage_op_cbk>) >> at glusterd-rpc-ops.c:207 >> #11 0x00007f6f777be9ff in rpc_clnt_submit (rpc=rpc@entry=0x13e3260, >> prog=prog@entry=0x7f6f6cce9ca0 <gd_mgmt_prog>, procnum=procnum@entry=3, >> cbkfn=cbkfn@entry=0x7f6f6ca75330 <gd_syncop_stage_op_cbk>, >> proghdr=proghdr@entry=0x17eb970, proghdrcount=proghdrcount@entry=1, >> progpayload=progpayload@entry=0x0, >> progpayloadcount=progpayloadcount@entry=0, >> iobref=iobref@entry=0x7f6f4bd4a9c0, frame=frame@entry=0x7f6f759ff24c, >> rsphdr=rsphdr@entry=0x0, rsphdr_count=rsphdr_count@entry=0, >> rsp_payload=rsp_payload@entry=0x0, >> rsp_payload_count=rsp_payload_count@entry=0, >> rsp_iobref=rsp_iobref@entry=0x0) at rpc-clnt.c:1601 >> #12 0x00007f6f6ca76470 in gd_syncop_submit_request (rpc=0x13e3260, >> req=req@entry=0x7f6f60239830, local=local@entry=0x17ebaa0, >> cookie=cookie@entry=0x13e2770, prog=0x7f6f6cce9ca0 <gd_mgmt_prog>, >> procnum=procnum@entry=3, cbkfn=cbkfn@entry=0x7f6f6ca75330 >> <gd_syncop_stage_op_cbk>, >> ---Type <return> to continue, or q <return> to quit--- >> xdrproc=0x7f6f775a4910 <xdr_gd1_mgmt_stage_op_req>) at >> glusterd-syncop.c:198 >> #13 0x00007f6f6ca773d6 in gd_syncop_mgmt_stage_op >> (peerinfo=peerinfo@entry=0x13e2770, >> args=args@entry=0x17ebaa0, >> my_uuid=my_uuid@entry=0x13deff8 >> "y\205mKLMF\340\241\366\340\037#\227$x/var/lib/glusterd", >> recv_uuid=recv_uuid@entry=0x17eba90 "", op=op@entry=11, >> dict_out=dict_out@entry=0x7f6f754045e4, >> op_ctx=op_ctx@entry=0x7f6f754045e4) >> at glusterd-syncop.c:749 >> #14 0x00007f6f6ca77d4b in gd_stage_op_phase (peers=peers@entry=0x13defe0, >> op=<optimized out>, op_ctx=op_ctx@entry=0x7f6f754045e4, >> req_dict=0x7f6f754045e4, >> op_errstr=op_errstr@entry=0x17ec2a8, npeers=npeers@entry=1) at >> glusterd-syncop.c:1169 >> #15 0x00007f6f6ca790b8 in gd_sync_task_begin >> (op_ctx=op_ctx@entry=0x7f6f754045e4, >> req=req@entry=0x13da238) at glusterd-syncop.c:1619 >> #16 0x00007f6f6ca7928c in glusterd_op_begin_synctask >> (req=req@entry=0x13da238, >> op=op@entry=GD_OP_SET_VOLUME, dict=0x7f6f754045e4) at >> glusterd-syncop.c:1680 >> #17 0x00007f6f6c9e52e6 in __glusterd_handle_set_volume >> (req=req@entry=0x13da238) >> at glusterd-handler.c:1762 >> #18 0x00007f6f6c9f1d60 in glusterd_big_locked_handler (req=0x13da238, >> actor_fn=0x7f6f6c9e4dd0 <__glusterd_handle_set_volume>) at >> glusterd-handler.c:79 >> #19 0x00007f6f77a21d32 in synctask_wrap (old_task=<optimized out>) at >> syncop.c:335 >> #20 0x00007f6f765faa00 in ?? () from /lib64/libc.so.6 >> #21 0x0000000000000000 in ?? () >> >> Thread 3 (Thread 0x7f6f6d4f2700 (LWP 30420)): >> ---Type <return> to continue, or q <return> to quit--- >> #0 0x00007f6f76d650c9 in pthread_cond_timedwait@@GLIBC_2.3.2 () from >> /lib64/libpthread.so.0 >> #1 0x00007f6f77a237ef in syncenv_task (proc=proc@entry=0x13bfe90) at >> syncop.c:535 >> #2 0x00007f6f77a24370 in syncenv_processor (thdata=0x13bfe90) at >> syncop.c:613 >> #3 0x00007f6f76d60f33 in start_thread () from /lib64/libpthread.so.0 >> #4 0x00007f6f766a7ded in clone () from /lib64/libc.so.6 >> >> Thread 2 (Thread 0x7f6f6564a700 (LWP 30546)): >> #0 0x00007f6f76d64d20 in pthread_cond_wait@@GLIBC_2.3.2 () from >> /lib64/libpthread.so.0 >> #1 0x00007f6f6ca7a183 in hooks_worker (args=<optimized out>) at >> glusterd-hooks.c:502 >> #2 0x00007f6f76d60f33 in start_thread () from /lib64/libpthread.so.0 >> #3 0x00007f6f766a7ded in clone () from /lib64/libc.so.6 >> >> Thread 1 (Thread 0x7f6f77e7c740 (LWP 30416)): >> #0 0x00007f6f766e26bf in _dl_addr () from /lib64/libc.so.6 >> #1 0x00007f6f766b67d5 in backtrace_symbols_fd () from /lib64/libc.so.6 >> #2 0x00007f6f779fdc88 in gf_backtrace_fillframes ( >> buf=buf@entry=0x1396378 "(--> >> /usr/local/lib/libglusterfs.so.0(_gf_log_callingfn+0x15c)[0x7f6f779ea1bc] >> (--> >> /usr/local/lib/libgfrpc.so.0(saved_frames_unwind+0x1c2)[0x7f6f777bd942] >> (--> /usr/local/lib/libgfrpc.so.0(saved_fram"...) at common-utils.c:3335 >> #3 0x00007f6f77a03631 in gf_backtrace_save (buf=buf@entry=0x0) at >> common-utils.c:3391 >> #4 0x00007f6f779ea1bc in _gf_log_callingfn (domain=0x13e62b0 "management", >> file=file@entry=0x7f6f777c4746 "rpc-clnt.c", >> function=function@entry=0x7f6f777c4ff0 <__FUNCTION__.13438> >> "saved_frames_unwind", >> ---Type <return> to continue, or q <return> to quit--- >> line=line@entry=362, level=level@entry=GF_LOG_ERROR, >> fmt=fmt@entry=0x7f6f777c4930 "forced unwinding frame type(%s) >> op(%s(%d)) called at %s (xid=0x%x)") at logging.c:822 >> #5 0x00007f6f777bd942 in saved_frames_unwind >> (saved_frames=saved_frames@entry=0x13e71e0) >> at rpc-clnt.c:353 >> #6 0x00007f6f777bda3e in saved_frames_destroy (frames=0x13e71e0) at >> rpc-clnt.c:383 >> #7 0x00007f6f777bf22c in rpc_clnt_connection_cleanup >> (conn=conn@entry=0x13e3290) >> at rpc-clnt.c:536 >> #8 0x00007f6f777bf9c8 in rpc_clnt_notify (trans=<optimized out>, >> mydata=0x13e3290, >> event=<optimized out>, data=<optimized out>) at rpc-clnt.c:843 >> #9 0x00007f6f777bbd33 in rpc_transport_notify (this=this@entry=0x13e62f0, >> event=event@entry=RPC_TRANSPORT_DISCONNECT, data=data@entry=0x13e62f0) >> at rpc-transport.c:516 >> #10 0x00007f6f66850962 in socket_event_poll_err (this=0x13e62f0) at >> socket.c:1161 >> #11 socket_event_handler (fd=<optimized out>, idx=3, >> data=data@entry=0x13e62f0, poll_in=1, >> poll_out=4, poll_err=<optimized out>) at socket.c:2353 >> #12 0x00007f6f77a3dd82 in event_dispatch_epoll_handler (i=<optimized >> out>, events=0x13e0700, >> event_pool=0x13b43c0) at event-epoll.c:384 >> #13 event_dispatch_epoll (event_pool=0x13b43c0) at event-epoll.c:445 >> #14 0x0000000000404e89 in main (argc=1, argv=0x7fff7a929408) at >> glusterfsd.c:2046 >> >> From the backtrace I could figure out problematic area is in Thread 1 & >> 4, probably a deadlock. Need to analyse it, I will be in a training for >> the entire week, will jump into it in early next week. >> >> ~Atin >> > _______________________________________________ Gluster-devel mailing list Gluster-devel@xxxxxxxxxxx http://supercolony.gluster.org/mailman/listinfo/gluster-devel