deadlock in fio.. client/server

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I see what looks to me like a deadlock when running with multiple jobs
in client/server mode.
However, I'm not too used to debugging multithreaded code, so I
figured maybe some additional eyes would help.

This is on a Gen8 HP DL380 (dual socket x86_64) running CentOS EL6.

I haven't seen this when just running from the command line.

I start the daemon with:

fio --daemonize=/tmp/fio.pid.4987 --server=localhost,8098

and a client with
fio --client=localhost 8098 --output=test_blk.20140830Z1419 test_bad2.fio

#
[global]

ioengine=libaio
thread=1
bs=4k
filename=/dev/sdb
size=1M
do_verify=1
iodepth=8
stonewall

[writes]
rw=write
# 3 passes
# 4 fails
numjobs=4

and sometimes (50% or higher with num_jobs=4) it hangs.

If I look at it in GDB we see:
(gdb) info thread
  6 Thread 0x7f446644a700 (LWP 5017)  0x0000003ffa8acb8d in nanosleep
() from /lib64/libc.so.6
  5 Thread 0x7f4465a47700 (LWP 5023)  0x0000003ffac0e054 in
__lll_lock_wait () from /lib64/libpthread.so.0
  4 Thread 0x7f4465045700 (LWP 5024)  0x0000003ffac0e054 in
__lll_lock_wait () from /lib64/libpthread.so.0
  3 Thread 0x7f4464643700 (LWP 5025)  0x0000003ffac0e054 in
__lll_lock_wait () from /lib64/libpthread.so.0
  2 Thread 0x7f4463c41700 (LWP 5027)  0x0000003ffac0e054 in
__lll_lock_wait () from /lib64/libpthread.so.0
* 1 Thread 0x7f446f4fe720 (LWP 5016)  0x0000003ffa8acb8d in nanosleep
() from /lib64/libc.so.6
(gdb) p mp[0].lock
$1 = (struct fio_mutex *) 0x7f446f509000
(gdb) p *mp[0].lock
$2 = {
  lock = {
    __data = {
      __lock = 2,
      __count = 0,
      __owner = 5023,
      __nusers = 1,
      __kind = 128,
      __spins = 0,
      __list = {     __prev = 0x0,      __next = 0x0    }
    },
    __size = "\002\000\000\000\000\000\000\000\237\023\000\000\001\000\000\000\200",
'\000' <repeats 22 times>,
    __align = 2
  },
  cond = {
    __data = {
      __lock = 2,    __futex = 0,
      __total_seq = 18446744073709551615,
      __wakeup_seq = 0,     __woken_seq = 0,
      __mutex = 0xffffffffffffffff,
      __nwaiters = 0,    __broadcast_seq = 0
    },
    __size = "\002\000\000\000\000\000\000\000\377\377\377\377\377\377\377\377",
'\000' <repeats 16 times>"\377,
\377\377\377\377\377\377\377\000\000\000\000\000\000\000",
    __align = 2
  },
  value = 0,
  waiters = 1,
  magic = 1297437765
}

But if we look at lwp 5023 which is supposed to be the owner it looks
like it's waiting on the lock.

gdb) thread 5
[Switching to thread 5 (Thread 0x7f4465a47700 (LWP 5023))]#0
0x0000003ffac0e054 in __lll_lock_wait ()
   from /lib64/libpthread.so.0
(gdb) bt
#0  0x0000003ffac0e054 in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x0000003ffac0b4fc in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#2  0x000000000043fe81 in fio_mutex_down (mutex=0x7f446f509000) at
util/fio/mutex.c:155
#3  0x000000000044a7c1 in pool_lock (ptr=0x7f446ecf51f0) at
util/fio/smalloc.c:60
#4  sfree_pool (ptr=0x7f446ecf51f0) at util/fio/smalloc.c:328
#5  sfree (ptr=0x7f446ecf51f0) at util/fio/smalloc.c:356
#6  0x00000000004284f2 in close_and_free_files (td=0x7f4466450000) at
util/fio/filesetup.c:1122
#7  0x000000000040b495 in thread_main (data=0x7f4466450000) at
util/fio/backend.c:1548
#8  0x0000003ffac07851 in start_thread () from /lib64/libpthread.so.0
#9  0x0000003ffa8e890d in clone () from /lib64/libc.so.6

And all threads....

Thread 6 (Thread 0x7f446644a700 (LWP 5017)):
#0  0x0000003ffa8acb8d in nanosleep () from /lib64/libc.so.6
#1  0x0000003ffa8e1d64 in usleep () from /lib64/libc.so.6
#2  0x000000000040a558 in disk_thread_main (data=Unhandled dwarf
expression opcode 0xf3
) at util/fio/backend.c:1992
#3  0x0000003ffac07851 in start_thread () from /lib64/libpthread.so.0
#4  0x0000003ffa8e890d in clone () from /lib64/libc.so.6

Thread 5 (Thread 0x7f4465a47700 (LWP 5023)):
#0  0x0000003ffac0e054 in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x0000003ffac0b4fc in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#2  0x000000000043fe81 in fio_mutex_down (mutex=0x7f446f509000) at
util/fio/mutex.c:155
#3  0x000000000044a7c1 in pool_lock (ptr=0x7f446ecf51f0) at
util/fio/smalloc.c:60
#4  sfree_pool (ptr=0x7f446ecf51f0) at util/fio/smalloc.c:328
#5  sfree (ptr=0x7f446ecf51f0) at util/fio/smalloc.c:356
#6  0x00000000004284f2 in close_and_free_files (td=0x7f4466450000) at
util/fio/filesetup.c:1122
#7  0x000000000040b495 in thread_main (data=0x7f4466450000) at
util/fio/backend.c:1548
#8  0x0000003ffac07851 in start_thread () from /lib64/libpthread.so.0
#9  0x0000003ffa8e890d in clone () from /lib64/libc.so.6

Thread 4 (Thread 0x7f4465045700 (LWP 5024)):
#0  0x0000003ffac0e054 in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x0000003ffac09388 in _L_lock_854 () from /lib64/libpthread.so.0
#2  0x0000003ffac09257 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x000000000043fe58 in fio_mutex_down (mutex=0x7f446f509000) at
util/fio/mutex.c:151
#4  0x000000000044a7c1 in pool_lock (ptr=0x7f446ecf5390) at
util/fio/smalloc.c:60
#5  sfree_pool (ptr=0x7f446ecf5390) at util/fio/smalloc.c:328
#6  sfree (ptr=0x7f446ecf5390) at util/fio/smalloc.c:356
#7  0x00000000004284f2 in close_and_free_files (td=0x7f44664553b8) at
util/fio/filesetup.c:1122
#8  0x000000000040b495 in thread_main (data=0x7f44664553b8) at
util/fio/backend.c:1548
#9  0x0000003ffac07851 in start_thread () from /lib64/libpthread.so.0
#10 0x0000003ffa8e890d in clone () from /lib64/libc.so.6

Thread 3 (Thread 0x7f4464643700 (LWP 5025)):
#0  0x0000003ffac0e054 in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x0000003ffac09388 in _L_lock_854 () from /lib64/libpthread.so.0
#2  0x0000003ffac09257 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x000000000043fe58 in fio_mutex_down (mutex=0x7f446f509000) at
util/fio/mutex.c:151
#4  0x000000000044a7c1 in pool_lock (ptr=0x7f446ecf5690) at
util/fio/smalloc.c:60
#5  sfree_pool (ptr=0x7f446ecf5690) at util/fio/smalloc.c:328
#6  sfree (ptr=0x7f446ecf5690) at util/fio/smalloc.c:356
#7  0x00000000004284c7 in close_and_free_files (td=0x7f446645a770) at
util/fio/filesetup.c:1118
#8  0x000000000040b495 in thread_main (data=0x7f446645a770) at
util/fio/backend.c:1548
#9  0x0000003ffac07851 in start_thread () from /lib64/libpthread.so.0
#10 0x0000003ffa8e890d in clone () from /lib64/libc.so.6

Thread 2 (Thread 0x7f4463c41700 (LWP 5027)):
#0  0x0000003ffac0e054 in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x0000003ffac09388 in _L_lock_854 () from /lib64/libpthread.so.0
#2  0x0000003ffac09257 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x000000000043feda in fio_mutex_up (mutex=0x7f446f509000) at
util/fio/mutex.c:169
#4  0x00000000004284c7 in close_and_free_files (td=0x7f446645fb28) at
util/fio/filesetup.c:1118
#5  0x000000000040b495 in thread_main (data=0x7f446645fb28) at
util/fio/backend.c:1548
#6  0x0000003ffac07851 in start_thread () from /lib64/libpthread.so.0
#7  0x0000003ffa8e890d in clone () from /lib64/libc.so.6

Thread 1 (Thread 0x7f446f4fe720 (LWP 5016)):
#0  0x0000003ffa8acb8d in nanosleep () from /lib64/libc.so.6
#1  0x0000003ffa8e1d64 in usleep () from /lib64/libc.so.6
#2  0x000000000040e343 in do_usleep () at util/fio/backend.c:1727
#3  run_threads () at util/fio/backend.c:1965
#4  0x000000000040e6ed in fio_backend () at util/fio/backend.c:2068
#5  0x000000000044a1f9 in handle_run_cmd () at util/fio/server.c:567
#6  handle_command () at util/fio/server.c:763
#7  handle_connection () at util/fio/server.c:829
#8  accept_loop () at util/fio/server.c:915
#9  fio_server () at util/fio/server.c:1614
#10 0x000000000044a329 in fio_start_server (pidfile=0x268f000
"/tmp/fio.pid.4987") at util/fio/server.c:1724
#11 0x0000000000430574 in parse_cmd_line (argc=Unhandled dwarf
expression opcode 0xf3
) at util/fio/init.c:2154
#12 0x0000000000430b23 in parse_options (argc=3, argv=0x7fff62b22be8)
at util/fio/init.c:2208
#13 0x000000000042974c in main (argc=3, argv=0x7fff62b22be8,
envp=Unhandled dwarf expression opcode 0xf3
) at util/fio/fio.c:40
(
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux