Re: rbd hang

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I don't see the read request hitting the wire, so I am thinking your client cannot talk to the primary PG for the 'rb.0.16cf.238e1f29.000000000000' object.  Try adding "debug objecter = 20" to your configuration to get more details.

-- 

Jason Dillaman 

----- Original Message -----
> From: "Joe Ryner" <jryner@xxxxxxxx>
> To: ceph-users@xxxxxxxx
> Sent: Thursday, October 29, 2015 12:22:01 PM
> Subject:  rbd hang
> 
> i,
> 
> I am having a strange problem with our development cluster.  When I run rbd
> export it just hangs.  I have been running ceph for a long time and haven't
> encountered this kind of issue.  Any ideas as to what is going on?
> 
> rbd -p locks export seco101ira -
> 
> 
> I am running
> 
> Centos 6.6 x86 64
> 
> ceph version 0.80.10 (ea6c958c38df1216bf95c927f143d8b13c4a9e70)
> 
> I have enabled debugging and get the following when I run the command
> 
> [root@durbium ~]# rbd -p locks export seco101ira -
> 2015-10-29 11:17:08.183597 7fc3334fa7c0  1 librados: starting msgr at :/0
> 2015-10-29 11:17:08.183613 7fc3334fa7c0  1 librados: starting objecter
> 2015-10-29 11:17:08.183739 7fc3334fa7c0  1 -- :/0 messenger.start
> 2015-10-29 11:17:08.183779 7fc3334fa7c0  1 librados: setting wanted keys
> 2015-10-29 11:17:08.183782 7fc3334fa7c0  1 librados: calling monclient init
> 2015-10-29 11:17:08.184365 7fc3334fa7c0  1 -- :/1024687 -->
> 10.134.128.42:6789/0 -- auth(proto 0 30 bytes epoch 0) v1 -- ?+0 0x15ba900
> con 0x15ba540
> 2015-10-29 11:17:08.185006 7fc3334f2700  1 -- 10.134.128.41:0/1024687 learned
> my addr 10.134.128.41:0/1024687
> 2015-10-29 11:17:08.185995 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 1 ==== mon_map v1 ==== 491+0+0 (318324477 0 0)
> 0x7fc318000be0 con 0x15ba540
> 2015-10-29 11:17:08.186213 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 2 ==== auth_reply(proto 2 0 (0) Success) v1 ====
> 33+0+0 (4093383511 0 0) 0x7fc318001090 con 0x15ba540
> 2015-10-29 11:17:08.186544 7fc32da9a700  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.42:6789/0 -- auth(proto 2 32 bytes epoch 0) v1 -- ?+0
> 0x7fc31c001700 con 0x15ba540
> 2015-10-29 11:17:08.187160 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 3 ==== auth_reply(proto 2 0 (0) Success) v1 ====
> 206+0+0 (2382192463 0 0) 0x7fc318001090 con 0x15ba540
> 2015-10-29 11:17:08.187354 7fc32da9a700  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.42:6789/0 -- auth(proto 2 165 bytes epoch 0) v1 -- ?+0
> 0x7fc31c002220 con 0x15ba540
> 2015-10-29 11:17:08.188001 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 4 ==== auth_reply(proto 2 0 (0) Success) v1 ====
> 393+0+0 (34117402 0 0) 0x7fc3180008c0 con 0x15ba540
> 2015-10-29 11:17:08.188148 7fc32da9a700  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.42:6789/0 -- mon_subscribe({monmap=0+}) v2 -- ?+0 0x15b6b80 con
> 0x15ba540
> 2015-10-29 11:17:08.188334 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.42:6789/0 -- mon_subscribe({monmap=6+,osdmap=0}) v2 -- ?+0
> 0x15b7700 con 0x15ba540
> 2015-10-29 11:17:08.188355 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.42:6789/0 -- mon_subscribe({monmap=6+,osdmap=0}) v2 -- ?+0
> 0x15b7ca0 con 0x15ba540
> 2015-10-29 11:17:08.188445 7fc3334fa7c0  1 librados: init done
> 2015-10-29 11:17:08.188463 7fc3334fa7c0 10 librados: wait_for_osdmap waiting
> 2015-10-29 11:17:08.188625 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 5 ==== mon_map v1 ==== 491+0+0 (318324477 0 0)
> 0x7fc318001300 con 0x15ba540
> 2015-10-29 11:17:08.188795 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 6 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0
> (646930372 0 0) 0x7fc3180015a0 con 0x15ba540
> 2015-10-29 11:17:08.189129 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 7 ==== osd_map(4350..4350 src has 3829..4350) v3
> ==== 7562+0+0 (1787729222 0 0) 0x7fc3180013b0 con 0x15ba540
> 2015-10-29 11:17:08.189452 7fc3334fa7c0 10 librados: wait_for_osdmap done
> waiting
> 2015-10-29 11:17:08.189454 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 8 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0
> (646930372 0 0) 0x7fc3180008c0 con 0x15ba540
> 2015-10-29 11:17:08.189470 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 9 ==== osd_map(4350..4350 src has 3829..4350) v3
> ==== 7562+0+0 (1787729222 0 0) 0x7fc318005290 con 0x15ba540
> 2015-10-29 11:17:08.189485 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> mon.1 10.134.128.42:6789/0 10 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0
> (646930372 0 0) 0x7fc3180056d0 con 0x15ba540
> 2015-10-29 11:17:08.189522 7fc3334fa7c0 20 librbd::ImageCtx: enabling
> caching...
> 2015-10-29 11:17:08.189540 7fc3334fa7c0 20 librbd::ImageCtx: Initial cache
> settings: size=64 num_objects=10 max_dirty=32 target_dirty=16
> max_dirty_age=5
> 2015-10-29 11:17:08.189686 7fc3334fa7c0 20 librbd: open_image: ictx =
> 0x15b8390 name = 'seco101ira' id = '' snap_name = ''
> 2015-10-29 11:17:08.189730 7fc3334fa7c0 10 librados: stat oid=seco101ira.rbd
> nspace=
> 2015-10-29 11:17:08.189882 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:1 seco101ira.rbd [stat]
> 4.a982c550 ack+read e4350) v4 -- ?+0 0x15baf60 con 0x15b9e70
> 2015-10-29 11:17:08.192470 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> osd.2 10.134.128.43:6803/2741 1 ==== osd_op_reply(1 seco101ira.rbd [stat]
> v0'0 uv1 ondisk = 0) v6 ==== 181+0+16 (1355327314 0 448133945)
> 0x7fc314000c10 con 0x15b9e70
> 2015-10-29 11:17:08.192548 7fc3334fa7c0 10 librados: Objecter returned from
> stat r=0
> 2015-10-29 11:17:08.192563 7fc3334fa7c0 20 librbd: detect format of
> seco101ira : old
> 2015-10-29 11:17:08.192600 7fc3334fa7c0 20 librbd: ictx_refresh 0x15b8390
> 2015-10-29 11:17:08.192614 7fc3334fa7c0 10 librados: read oid=seco101ira.rbd
> nspace=
> 2015-10-29 11:17:08.192631 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:2 seco101ira.rbd [read
> 0~4096] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bb700 con 0x15b9e70
> 2015-10-29 11:17:08.193957 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> osd.2 10.134.128.43:6803/2741 2 ==== osd_op_reply(2 seco101ira.rbd [read
> 0~112] v0'0 uv1 ondisk = 0) v6 ==== 181+0+112 (674783017 0 641510945)
> 0x7fc314000c10 con 0x15b9e70
> 2015-10-29 11:17:08.194018 7fc3334fa7c0 10 librados: Objecter returned from
> read r=0
> 2015-10-29 11:17:08.194022 7fc3334fa7c0 10 librados: Returned length 112 less
> than original length 4096
> 2015-10-29 11:17:08.194058 7fc3334fa7c0 10 librados: call oid=seco101ira.rbd
> nspace=
> 2015-10-29 11:17:08.194074 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:3 seco101ira.rbd [call
> rbd.snap_list] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bbd40 con 0x15b9e70
> 2015-10-29 11:17:08.194845 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> osd.2 10.134.128.43:6803/2741 3 ==== osd_op_reply(3 seco101ira.rbd [call]
> v0'0 uv1 ondisk = 0) v6 ==== 181+0+12 (1759032444 0 0) 0x7fc314000c10 con
> 0x15b9e70
> 2015-10-29 11:17:08.194902 7fc3334fa7c0 10 librados: Objecter returned from
> call r=0
> 2015-10-29 11:17:08.194944 7fc3334fa7c0 10 librados: call oid=seco101ira.rbd
> nspace=
> 2015-10-29 11:17:08.194963 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 -->
> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:4 seco101ira.rbd [call
> lock.get_info] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bd450 con 0x15b9e70
> 2015-10-29 11:17:08.196313 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <==
> osd.2 10.134.128.43:6803/2741 4 ==== osd_op_reply(4 seco101ira.rbd [call]
> v0'0 uv1 ondisk = 0) v6 ==== 181+0+15 (3120959177 0 2149983739)
> 0x7fc314000c10 con 0x15b9e70
> 2015-10-29 11:17:08.196356 7fc3334fa7c0 10 librados: Objecter returned from
> call r=0
> 2015-10-29 11:17:08.196382 7fc3334fa7c0 10 librbd::ImageCtx:  cache bytes 64
> order 22 -> about 10 objects
> 2015-10-29 11:17:08.196386 7fc3334fa7c0 10 librbd::ImageCtx: init_layout
> stripe_unit 4194304 stripe_count 1 object_size 4194304 prefix
> rb.0.16cf.238e1f29 format rb.0.16cf.238e1f29.%012llx
> 2015-10-29 11:17:08.196403 7fc3334fa7c0 10 librados: set snap write context:
> seq = 0 and snaps = []
> 2015-10-29 11:17:08.196437 7fc3334fa7c0 10 librados: set snap read head ->
> head
> 2015-10-29 11:17:08.196446 7fc3334fa7c0 20 librbd: info 0x15b8390
> 2015-10-29 11:17:08.196452 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390
> 2015-10-29 11:17:08.196461 7fc3334fa7c0 20 librbd: read_iterate 0x15b8390 off
> = 0 len = 1048576
> 2015-10-29 11:17:08.196464 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390
> 2015-10-29 11:17:08.196475 7fc3334fa7c0 20 librbd: aio_read 0x15b8390
> completion 0x15bbc80 [0,1048576]
> 2015-10-29 11:17:08.196479 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390
> 2015-10-29 11:17:08.196504 7fc3334fa7c0 20 librbd:  oid
> rb.0.16cf.238e1f29.000000000000 0~1048576 from [0,1048576]
> 2015-10-29 11:17:08.196564 7fc3334fa7c0 20 librbd::AioCompletion:
> AioCompletion::finish_adding_requests 0x15bbc80 pending 1
> 
> Below is my ceph.conf
> ----------------------
> 
> [client]
>         rbd cache = true
>         rbd cache size = 64
>         rbd cache max dirty = 32
>         rbd cache target dirty = 16
>         rbd cache max dirty age = 5.0
> 
>         debug rbd = 20
>         debug rados = 20
>         debug ms = 1
>         log file = /var/log/ceph/client.$name.$pid.log
> [global]
>          public network = 10.134.128.0/26
>          cluster network = 10.134.128.64/26
>          fsid = a0a1c9ea-9a77-41fe-aef1-b87136776ac3
> 
>          # For version 0.55 and beyond, you must explicitly enable
>          # or disable authentication with "auth" entries in [global].
>  
>           auth cluster required = cephx
>          auth service required = cephx
>          auth client required = cephx
>                 osd journal size = 10000
> 
> [osd]
>          osd recovery max active = 1
> 
>          osd journal size = 10000
>          journal aio = true
>  
>          #The following assumes ext4 filesystem.
>          #osd mkfs options {fs-type} = {mkfs options}   # default for xfs is
>          "-f"
>          #osd mount options {fs-type} = {mount options} # default mount
>          option is "rw,noatime"
>          # For example, for ext4, the mount option might look like this:
>          #osd mkfs options ext4 = user_xattr,rw,noatime
>         
>          osd mkfs options xfs = -L -f -d su=64k,sw=1 $name
>          osd mount options btrfs = rw,noatime
>          osd mount options xfs = rw,noatime,nodiratime
> 
>          filestore xattr use omap = true
>          # osd mkfs type = btrfs
>          osd mkfs type = xfs
>          osd mkfs options btrfs = -L $name
> 
> 
>         # CAIT -- Manual commands to make and mount file system
> 
>         # -- Make xfs file system
>         # mkfs -t xfs -f -L ceph-X -d su=64k,sw=1 /dev/sdX1
> 
>         # -- Rescan Parition Label
>         # partprobe /dev/sdX1
> 
>         # -- Mount ceph file system
>         # mount -o rw,noatime,nodiratime /dev/disk/by-label/ceph-X
>         /var/lib/ceph/osd/ceph-X
> 
> [mon.durbium]
>         host = durbium
>         mon addr = 10.134.128.41:6789
> 
> [mon.zirconium]
>         host = zirconium
>         mon addr = 10.134.128.43:6789
> 
> [mon.stone]
>         host = stone
>         mon addr = 10.134.128.42:6789
> 
> [osd.0]
>          host = durbium
>          devs = /dev/vg-osd-0/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-0/lv-journal
> 
> [osd.1]
>          host = zirconium
>          devs = /dev/vg-osd-1/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-1/lv-journal
> 
> [osd.2]
>          host = zirconium
>          devs = /dev/vg-osd-2/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-2/lv-journal
> 
> 
> [osd.3]
>          host = zirconium
>          devs = /dev/vg-osd-3/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-3/lv-journal
> 
> [osd.4]
>          host = zirconium
>          devs = /dev/vg-osd-4/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-4/lv-journal
> [osd.5]
>          host = stone
>          devs = /dev/vg-osd-5/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-5/lv-journal
> 
> [osd.6]
>          host = stone
>          devs = /dev/vg-osd-6/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-6/lv-journal
> 
> [osd.7]
>          host = stone
>          devs = /dev/vg-osd-7/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-7/lv-journal
> 
> [osd.8]
>          host = stone
>          devs = /dev/vg-osd-8/lv-osd
>          osd mkfs type = xfs
>          osd journal = /dev/vg-osd-8/lv-journal
> 
> 
> 
> 
> --
> Joe Ryner
> Center for the Application of Information Technologies (CAIT)
> _______________________________________________
> ceph-users mailing list
> ceph-users@xxxxxxxxxxxxxx
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
> 
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux