ZhengYan,
[root@ceph-radosgw-lb-backup cephfs]# ps aux | grep D
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 578 0.0 0.0 203360 3248 ? Ssl Aug24 0:00 /usr/sbin/gssproxy -D
root 865 0.0 0.0 82552 6104 ? Ss Aug24 0:00 /usr/sbin/sshd -D
root 2997 0.0 0.0 0 0 ? D Aug24 0:11 [kworker/2:1]
root 3996 0.0 0.0 115384 452 ? D Aug24 0:00 -bash
root 4479 0.0 0.0 112024 652 ? D Aug24 0:00 cat /mnt/cephfs/a/test-test1
root 18143 0.0 0.0 112656 2244 pts/2 S+ 08:19 0:00 grep --color=auto D
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 578 0.0 0.0 203360 3248 ? Ssl Aug24 0:00 /usr/sbin/gssproxy -D
root 865 0.0 0.0 82552 6104 ? Ss Aug24 0:00 /usr/sbin/sshd -D
root 2997 0.0 0.0 0 0 ? D Aug24 0:11 [kworker/2:1]
root 3996 0.0 0.0 115384 452 ? D Aug24 0:00 -bash
root 4479 0.0 0.0 112024 652 ? D Aug24 0:00 cat /mnt/cephfs/a/test-test1
root 18143 0.0 0.0 112656 2244 pts/2 S+ 08:19 0:00 grep --color=auto D
[root@ceph-radosgw-lb-backup cephfs]# cat /sys/kernel/debug/ceph/b6ea8682-c90d-495b-80f2-bc5bef1da9d1.client270186/osdc
REQUESTS 25 homeless 0
6225 osd1 21.cd699f5e [1,2,3]/1 [1,2,3]/1 10000000ab2.00000000 0x400014 1 read
6229 osd1 21.ba1e03ba [1,3,2]/1 [1,3,2]/1 10000008f05.00000002 0x400024 1 write
6234 osd1 21.29c5e8d [1,0,2]/1 [1,0,2]/1 10000008f05.00000007 0x400024 1 write
6238 osd1 21.9fe9de73 [1,2,0]/1 [1,2,0]/1 10000008f05.0000000b 0x400024 1 write
6240 osd1 21.a05b2ba8 [1,0,3]/1 [1,0,3]/1 10000008f05.0000000d 0x400024 1 write
6241 osd1 21.b7c85b45 [1,2,3]/1 [1,2,3]/1 10000008f05.0000000e 0x400024 1 write
6242 osd1 21.1bca917f [1,2,3]/1 [1,2,3]/1 10000008f05.0000000f 0x400024 1 write
6243 osd1 21.cdc3143b [1,2,3]/1 [1,2,3]/1 10000008f05.00000010 0x400024 1 write
6244 osd1 21.8e65566e [1,3,2]/1 [1,3,2]/1 10000008f05.00000011 0x400024 1 write
6246 osd1 21.5395ea66 [1,2,3]/1 [1,2,3]/1 10000008f05.00000013 0x400024 1 write
6251 osd1 21.2acb9e9c [1,0,2]/1 [1,0,2]/1 10000008f05.00000018 0x400024 1 write
6252 osd1 21.b22077e3 [1,0,3]/1 [1,0,3]/1 10000008f05.00000019 0x400024 1 write
6254 osd1 21.a9a1c2b1 [1,2,3]/1 [1,2,3]/1 10000008f05.0000001b 0x400024 1 write
6256 osd1 21.64cfc57f [1,2,3]/1 [1,2,3]/1 10000008f05.0000001d 0x400024 1 write
6259 osd1 21.629f77ff [1,2,3]/1 [1,2,3]/1 10000008f05.00000020 0x400024 1 write
6263 osd1 21.dde8c63 [1,0,3]/1 [1,0,3]/1 10000008f05.00000024 0x400024 1 write
6265 osd1 21.909ba5f8 [1,0,3]/1 [1,0,3]/1 10000008f05.00000026 0x400024 1 write
6266 osd1 21.5496fef5 [1,3,2]/1 [1,3,2]/1 10000008f05.00000027 0x400024 1 write
6269 osd1 21.1f26a27b [1,2,3]/1 [1,2,3]/1 10000008f05.0000002a 0x400024 1 write
6270 osd1 21.c9021b4e [1,0,2]/1 [1,0,2]/1 10000008f05.0000002b 0x400024 1 write
6276 osd1 21.e6df28ed [1,2,0]/1 [1,2,0]/1 10000008f05.00000031 0x400024 1 write
6286 osd1 21.1a38cab8 [1,0,3]/1 [1,0,3]/1 10000008f05.0000003b 0x400024 1 write
6287 osd1 21.425da4ba [1,3,2]/1 [1,3,2]/1 10000008f05.0000003c 0x400024 1 write
6291 osd1 21.bb5fa23f [1,2,3]/1 [1,2,3]/1 10000008f05.00000040 0x400024 1 write
6298 osd1 21.b9255c8e [1,0,2]/1 [1,0,2]/1 10000008f05.00000047 0x400024 1 write
LINGER REQUESTS
REQUESTS 25 homeless 0
6225 osd1 21.cd699f5e [1,2,3]/1 [1,2,3]/1 10000000ab2.00000000 0x400014 1 read
6229 osd1 21.ba1e03ba [1,3,2]/1 [1,3,2]/1 10000008f05.00000002 0x400024 1 write
6234 osd1 21.29c5e8d [1,0,2]/1 [1,0,2]/1 10000008f05.00000007 0x400024 1 write
6238 osd1 21.9fe9de73 [1,2,0]/1 [1,2,0]/1 10000008f05.0000000b 0x400024 1 write
6240 osd1 21.a05b2ba8 [1,0,3]/1 [1,0,3]/1 10000008f05.0000000d 0x400024 1 write
6241 osd1 21.b7c85b45 [1,2,3]/1 [1,2,3]/1 10000008f05.0000000e 0x400024 1 write
6242 osd1 21.1bca917f [1,2,3]/1 [1,2,3]/1 10000008f05.0000000f 0x400024 1 write
6243 osd1 21.cdc3143b [1,2,3]/1 [1,2,3]/1 10000008f05.00000010 0x400024 1 write
6244 osd1 21.8e65566e [1,3,2]/1 [1,3,2]/1 10000008f05.00000011 0x400024 1 write
6246 osd1 21.5395ea66 [1,2,3]/1 [1,2,3]/1 10000008f05.00000013 0x400024 1 write
6251 osd1 21.2acb9e9c [1,0,2]/1 [1,0,2]/1 10000008f05.00000018 0x400024 1 write
6252 osd1 21.b22077e3 [1,0,3]/1 [1,0,3]/1 10000008f05.00000019 0x400024 1 write
6254 osd1 21.a9a1c2b1 [1,2,3]/1 [1,2,3]/1 10000008f05.0000001b 0x400024 1 write
6256 osd1 21.64cfc57f [1,2,3]/1 [1,2,3]/1 10000008f05.0000001d 0x400024 1 write
6259 osd1 21.629f77ff [1,2,3]/1 [1,2,3]/1 10000008f05.00000020 0x400024 1 write
6263 osd1 21.dde8c63 [1,0,3]/1 [1,0,3]/1 10000008f05.00000024 0x400024 1 write
6265 osd1 21.909ba5f8 [1,0,3]/1 [1,0,3]/1 10000008f05.00000026 0x400024 1 write
6266 osd1 21.5496fef5 [1,3,2]/1 [1,3,2]/1 10000008f05.00000027 0x400024 1 write
6269 osd1 21.1f26a27b [1,2,3]/1 [1,2,3]/1 10000008f05.0000002a 0x400024 1 write
6270 osd1 21.c9021b4e [1,0,2]/1 [1,0,2]/1 10000008f05.0000002b 0x400024 1 write
6276 osd1 21.e6df28ed [1,2,0]/1 [1,2,0]/1 10000008f05.00000031 0x400024 1 write
6286 osd1 21.1a38cab8 [1,0,3]/1 [1,0,3]/1 10000008f05.0000003b 0x400024 1 write
6287 osd1 21.425da4ba [1,3,2]/1 [1,3,2]/1 10000008f05.0000003c 0x400024 1 write
6291 osd1 21.bb5fa23f [1,2,3]/1 [1,2,3]/1 10000008f05.00000040 0x400024 1 write
6298 osd1 21.b9255c8e [1,0,2]/1 [1,0,2]/1 10000008f05.00000047 0x400024 1 write
LINGER REQUESTS
[root@ceph-radosgw-lb-backup cephfs]# cat /proc/2997/stack
[<ffffffff810b0826>] io_schedule+0x16/0x40
[<ffffffff811aa859>] __lock_page+0x119/0x170
[<ffffffff811bd301>] truncate_inode_pages_range+0x421/0x790
[<ffffffff811bd737>] truncate_pagecache+0x47/0x60
[<ffffffffa0553322>] __ceph_do_pending_vmtruncate+0xc2/0x1c0 [ceph]
[<ffffffffa055343b>] ceph_vmtruncate_work+0x1b/0x40 [ceph]
[<ffffffff8109c3b9>] process_one_work+0x149/0x360
[<ffffffff8109cb3d>] worker_thread+0x4d/0x3c0
[<ffffffff810a26e9>] kthread+0x109/0x140
[<ffffffff817b15c5>] ret_from_fork+0x25/0x30
[<ffffffffffffffff>] 0xffffffffffffffff
[root@ceph-radosgw-lb-backup cephfs]# cat /proc/3996/stack
[<ffffffffa05532a4>] __ceph_do_pending_vmtruncate+0x44/0x1c0 [ceph]
[<ffffffffa0553bfa>] __ceph_setattr+0x79a/0x8b0 [ceph]
[<ffffffffa0553d4c>] ceph_setattr+0x3c/0x60 [ceph]
[<ffffffff812623b6>] notify_change+0x266/0x440
[<ffffffff8123cd85>] do_truncate+0x75/0xc0
[<ffffffff8124f7aa>] path_openat+0xaba/0x13b0
[<ffffffff81251c81>] do_filp_open+0x91/0x100
[<ffffffff8123e304>] do_sys_open+0x124/0x210
[<ffffffff8123e40e>] SyS_open+0x1e/0x20
[<ffffffff81003a07>] do_syscall_64+0x67/0x150
[<ffffffff817b1427>] entry_SYSCALL64_slow_path+0x25/0x25
[<ffffffffffffffff>] 0xffffffffffffffff
[root@ceph-radosgw-lb-backup cephfs]# cat /proc/4479/stack
[<ffffffffa05532a4>] __ceph_do_pending_vmtruncate+0x44/0x1c0 [ceph]
[<ffffffffa05649b5>] try_get_cap_refs+0xb5/0x5a0 [ceph]
[<ffffffffa0567bd9>] ceph_get_caps+0x119/0x390 [ceph]
[<ffffffffa05587a5>] ceph_read_iter+0xc5/0x820 [ceph]
[<ffffffff8123ee2f>] __vfs_read+0xdf/0x130
[<ffffffff8123fbbc>] vfs_read+0x8c/0x130
[<ffffffff812411a5>] SyS_read+0x55/0xc0
[<ffffffff81003a07>] do_syscall_64+0x67/0x150
[<ffffffff817b1427>] entry_SYSCALL64_slow_path+0x25/0x25
[<ffffffffffffffff>] 0xffffffffffffffff
[<ffffffffa05532a4>] __ceph_do_pending_vmtruncate+0x44/0x1c0 [ceph]
[<ffffffffa05649b5>] try_get_cap_refs+0xb5/0x5a0 [ceph]
[<ffffffffa0567bd9>] ceph_get_caps+0x119/0x390 [ceph]
[<ffffffffa05587a5>] ceph_read_iter+0xc5/0x820 [ceph]
[<ffffffff8123ee2f>] __vfs_read+0xdf/0x130
[<ffffffff8123fbbc>] vfs_read+0x8c/0x130
[<ffffffff812411a5>] SyS_read+0x55/0xc0
[<ffffffff81003a07>] do_syscall_64+0x67/0x150
[<ffffffff817b1427>] entry_SYSCALL64_slow_path+0x25/0x25
[<ffffffffffffffff>] 0xffffffffffffffff
donglifecomm@xxxxxxxxx
From: donglifecomm@xxxxxxxxxDate: 2017-08-25 16:14To: zyanCC: ceph-usersSubject: Re: Re: cephfs, kernel(4.12.8) client version hung(D status), ceph version 0.94.9ZhengYan,I will test this problem again.Thanks a lot.
donglifecomm@xxxxxxxxxFrom: Yan, ZhengDate: 2017-08-25 16:12To: donglifecommCC: ceph-usersSubject: Re: cephfs, kernel(4.12.8) client version hung(D status), ceph version 0.94.9> On 24 Aug 2017, at 17:40, donglifecomm@xxxxxxxxx wrote:>> ZhengYan,>> I meet a problem, Follow the steps outlined below:>> 1. create 30G file test823> 2. host1 client(kernel 4.12.8)> cat /mnt/cephfs/a/test823 > /mnt/cephfs/a/test823-backup> ls -al /mnt/cephfs/a/*>> 3. host2 client(kernel 4.12.8)> while true; do cp /home/scripts/512k.file /mnt/cephfs/a/512k.file$i ; done // loop copy file> cat /mnt/cephfs/a/test823-backup > /mnt/cephfs/a/newtestfile> ls -al /mnt/cephfs/a/*>> 4. host2 client hung, stack is :> [ 9462.754853] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.> [ 9462.756838] bash D 0 32738 14988 0x00000084> [ 9462.758568] Call Trace:> [ 9462.759945] __schedule+0x28a/0x880> [ 9462.761414] schedule+0x36/0x80> [ 9462.762835] rwsem_down_write_failed+0x20d/0x380> [ 9462.764433] call_rwsem_down_write_failed+0x17/0x30> [ 9462.766075] ? __ceph_getxattr+0x340/0x340 [ceph]> [ 9462.767693] down_write+0x2d/0x40> [ 9462.769175] do_truncate+0x67/0xc0> [ 9462.770642] path_openat+0xaba/0x13b0> [ 9462.772136] do_filp_open+0x91/0x100> [ 9462.773616] ? __check_object_size+0x159/0x190> [ 9462.775156] ? __alloc_fd+0x46/0x170> [ 9462.776574] do_sys_open+0x124/0x210> [ 9462.777972] SyS_open+0x1e/0x20> [ 9462.779320] do_syscall_64+0x67/0x150> [ 9462.780736] entry_SYSCALL64_slow_path+0x25/0x25>> [root@cephtest ~]# cat /proc/29541/stack> [<ffffffffa0567b53>] ceph_mdsc_do_request+0x183/0x240 [ceph]> [<ffffffffa054785c>] __ceph_setattr+0x3fc/0x8b0 [ceph]> [<ffffffffa0547d4c>] ceph_setattr+0x3c/0x60 [ceph]> [<ffffffff812623b6>] notify_change+0x266/0x440> [<ffffffff8123cd85>] do_truncate+0x75/0xc0> [<ffffffff8124f7aa>] path_openat+0xaba/0x13b0> [<ffffffff81251c81>] do_filp_open+0x91/0x100> [<ffffffff8123e304>] do_sys_open+0x124/0x210> [<ffffffff8123e40e>] SyS_open+0x1e/0x20> [<ffffffff81003a07>] do_syscall_64+0x67/0x150> [<ffffffff817b1427>] entry_SYSCALL64_slow_path+0x25/0x25> [<ffffffffffffffff>] 0xffffffffffffffff>> [root@cephtest ~]# cat /proc/32738/stack> [<ffffffff8139a617>] call_rwsem_down_write_failed+0x17/0x30> [<ffffffff8123cd77>] do_truncate+0x67/0xc0> [<ffffffff8124f7aa>] path_openat+0xaba/0x13b0> [<ffffffff81251c81>] do_filp_open+0x91/0x100> [<ffffffff8123e304>] do_sys_open+0x124/0x210> [<ffffffff8123e40e>] SyS_open+0x1e/0x20> [<ffffffff81003a07>] do_syscall_64+0x67/0x150> [<ffffffff817b1427>] entry_SYSCALL64_slow_path+0x25/0x25> [<ffffffffffffffff>] 0xffffffffffffffff>> ceph log is:> f pending pAsLsXs issued pAsLsXsFcb, sent 1921.069365 seconds ago> 2017-08-24 17:16:00.219523 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 10000000424 pending pAsLsXs issued pAsLsXsFcb, sent 1921.063079 seconds ago> 2017-08-24 17:16:00.219534 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 10000000521 pending pAsLsXs issued pAsLsXsFcb, sent 1921.026983 seconds ago> 2017-08-24 17:16:00.219545 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 10000000523 pending pAsLsXs issued pAsLsXsFcb, sent 1920.985596 seconds ago> 2017-08-24 17:16:00.219574 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 10000000528 pending pAsLsXs issued pAsLsXsFcb, sent 1920.866863 seconds ago> 2017-08-24 17:16:00.219592 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 1000000052a pending pAsLsXs issued pAsLsXsFcb, sent 1920.788282 seconds ago> 2017-08-24 17:16:00.219606 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 1000000052c pending pAsLsXs issued pAsLsXsFcb, sent 1920.712564 seconds ago> 2017-08-24 17:16:00.219618 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 1000000052f pending pAsLsXs issued pAsLsXsFcb, sent 1920.563784 seconds ago> 2017-08-24 17:16:00.219630 7f746db8f700 0 log_channel(cluster) log [WRN] : client.268113 isn't responding to mclientcaps(revoke), ino 1000000040b pending pAsLsXsFsc issued pAsLsXsFscb, sent 1920.506752 seconds ago> 2017-08-24 17:16:00.219741 7f746db8f700 0 log_channel(cluster) log [WRN] : 4 slow requests, 1 included below; oldest blocked for > 1941.487238 secs> 2017-08-24 17:16:00.219753 7f746db8f700 0 log_channel(cluster) log [WRN] : slow request 1920.507384 seconds old, received at 2017-08-24 16:43:59.712319: client_request(client.268101:1122217 getattr pAsLsXsFs #1000000040b 2017-08-24 16:44:00.463827) currently failed to rdlock, waitingplease check if there are hung request in /sys/kernel/debug/ceph/*/osdc. It’s likely that kernel was unable to flush dirty pages.RegardsYan, Zheng>> Thanks a lot.>>>>>> donglifecomm@xxxxxxxxx
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com