Hi, I am having a strange problem with our development cluster. When I run rbd export it just hangs. I have been running ceph for a long time and haven't encountered this kind of issue. Any ideas as to what is going on? rbd -p locks export seco101ira - I am running Centos 6.6 x86 64 ceph version 0.80.10 (ea6c958c38df1216bf95c927f143d8b13c4a9e70) I have enabled debugging and get the following when I run the command [root@durbium ~]# rbd -p locks export seco101ira - 2015-10-29 11:17:08.183597 7fc3334fa7c0 1 librados: starting msgr at :/0 2015-10-29 11:17:08.183613 7fc3334fa7c0 1 librados: starting objecter 2015-10-29 11:17:08.183739 7fc3334fa7c0 1 -- :/0 messenger.start 2015-10-29 11:17:08.183779 7fc3334fa7c0 1 librados: setting wanted keys 2015-10-29 11:17:08.183782 7fc3334fa7c0 1 librados: calling monclient init 2015-10-29 11:17:08.184365 7fc3334fa7c0 1 -- :/1024687 --> 10.134.128.42:6789/0 -- auth(proto 0 30 bytes epoch 0) v1 -- ?+0 0x15ba900 con 0x15ba540 2015-10-29 11:17:08.185006 7fc3334f2700 1 -- 10.134.128.41:0/1024687 learned my addr 10.134.128.41:0/1024687 2015-10-29 11:17:08.185995 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 1 ==== mon_map v1 ==== 491+0+0 (318324477 0 0) 0x7fc318000be0 con 0x15ba540 2015-10-29 11:17:08.186213 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 2 ==== auth_reply(proto 2 0 (0) Success) v1 ==== 33+0+0 (4093383511 0 0) 0x7fc318001090 con 0x15ba540 2015-10-29 11:17:08.186544 7fc32da9a700 1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- auth(proto 2 32 bytes epoch 0) v1 -- ?+0 0x7fc31c001700 con 0x15ba540 2015-10-29 11:17:08.187160 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 3 ==== auth_reply(proto 2 0 (0) Success) v1 ==== 206+0+0 (2382192463 0 0) 0x7fc318001090 con 0x15ba540 2015-10-29 11:17:08.187354 7fc32da9a700 1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- auth(proto 2 165 bytes epoch 0) v1 -- ?+0 0x7fc31c002220 con 0x15ba540 2015-10-29 11:17:08.188001 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 4 ==== auth_reply(proto 2 0 (0) Success) v1 ==== 393+0+0 (34117402 0 0) 0x7fc3180008c0 con 0x15ba540 2015-10-29 11:17:08.188148 7fc32da9a700 1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- mon_subscribe({monmap=0+}) v2 -- ?+0 0x15b6b80 con 0x15ba540 2015-10-29 11:17:08.188334 7fc3334fa7c0 1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- mon_subscribe({monmap=6+,osdmap=0}) v2 -- ?+0 0x15b7700 con 0x15ba540 2015-10-29 11:17:08.188355 7fc3334fa7c0 1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- mon_subscribe({monmap=6+,osdmap=0}) v2 -- ?+0 0x15b7ca0 con 0x15ba540 2015-10-29 11:17:08.188445 7fc3334fa7c0 1 librados: init done 2015-10-29 11:17:08.188463 7fc3334fa7c0 10 librados: wait_for_osdmap waiting 2015-10-29 11:17:08.188625 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 5 ==== mon_map v1 ==== 491+0+0 (318324477 0 0) 0x7fc318001300 con 0x15ba540 2015-10-29 11:17:08.188795 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 6 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0 (646930372 0 0) 0x7fc3180015a0 con 0x15ba540 2015-10-29 11:17:08.189129 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 7 ==== osd_map(4350..4350 src has 3829..4350) v3 ==== 7562+0+0 (1787729222 0 0) 0x7fc3180013b0 con 0x15ba540 2015-10-29 11:17:08.189452 7fc3334fa7c0 10 librados: wait_for_osdmap done waiting 2015-10-29 11:17:08.189454 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 8 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0 (646930372 0 0) 0x7fc3180008c0 con 0x15ba540 2015-10-29 11:17:08.189470 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 9 ==== osd_map(4350..4350 src has 3829..4350) v3 ==== 7562+0+0 (1787729222 0 0) 0x7fc318005290 con 0x15ba540 2015-10-29 11:17:08.189485 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 10 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0 (646930372 0 0) 0x7fc3180056d0 con 0x15ba540 2015-10-29 11:17:08.189522 7fc3334fa7c0 20 librbd::ImageCtx: enabling caching... 2015-10-29 11:17:08.189540 7fc3334fa7c0 20 librbd::ImageCtx: Initial cache settings: size=64 num_objects=10 max_dirty=32 target_dirty=16 max_dirty_age=5 2015-10-29 11:17:08.189686 7fc3334fa7c0 20 librbd: open_image: ictx = 0x15b8390 name = 'seco101ira' id = '' snap_name = '' 2015-10-29 11:17:08.189730 7fc3334fa7c0 10 librados: stat oid=seco101ira.rbd nspace= 2015-10-29 11:17:08.189882 7fc3334fa7c0 1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:1 seco101ira.rbd [stat] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15baf60 con 0x15b9e70 2015-10-29 11:17:08.192470 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 1 ==== osd_op_reply(1 seco101ira.rbd [stat] v0'0 uv1 ondisk = 0) v6 ==== 181+0+16 (1355327314 0 448133945) 0x7fc314000c10 con 0x15b9e70 2015-10-29 11:17:08.192548 7fc3334fa7c0 10 librados: Objecter returned from stat r=0 2015-10-29 11:17:08.192563 7fc3334fa7c0 20 librbd: detect format of seco101ira : old 2015-10-29 11:17:08.192600 7fc3334fa7c0 20 librbd: ictx_refresh 0x15b8390 2015-10-29 11:17:08.192614 7fc3334fa7c0 10 librados: read oid=seco101ira.rbd nspace= 2015-10-29 11:17:08.192631 7fc3334fa7c0 1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:2 seco101ira.rbd [read 0~4096] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bb700 con 0x15b9e70 2015-10-29 11:17:08.193957 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 2 ==== osd_op_reply(2 seco101ira.rbd [read 0~112] v0'0 uv1 ondisk = 0) v6 ==== 181+0+112 (674783017 0 641510945) 0x7fc314000c10 con 0x15b9e70 2015-10-29 11:17:08.194018 7fc3334fa7c0 10 librados: Objecter returned from read r=0 2015-10-29 11:17:08.194022 7fc3334fa7c0 10 librados: Returned length 112 less than original length 4096 2015-10-29 11:17:08.194058 7fc3334fa7c0 10 librados: call oid=seco101ira.rbd nspace= 2015-10-29 11:17:08.194074 7fc3334fa7c0 1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:3 seco101ira.rbd [call rbd.snap_list] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bbd40 con 0x15b9e70 2015-10-29 11:17:08.194845 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 3 ==== osd_op_reply(3 seco101ira.rbd [call] v0'0 uv1 ondisk = 0) v6 ==== 181+0+12 (1759032444 0 0) 0x7fc314000c10 con 0x15b9e70 2015-10-29 11:17:08.194902 7fc3334fa7c0 10 librados: Objecter returned from call r=0 2015-10-29 11:17:08.194944 7fc3334fa7c0 10 librados: call oid=seco101ira.rbd nspace= 2015-10-29 11:17:08.194963 7fc3334fa7c0 1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:4 seco101ira.rbd [call lock.get_info] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bd450 con 0x15b9e70 2015-10-29 11:17:08.196313 7fc32da9a700 1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 4 ==== osd_op_reply(4 seco101ira.rbd [call] v0'0 uv1 ondisk = 0) v6 ==== 181+0+15 (3120959177 0 2149983739) 0x7fc314000c10 con 0x15b9e70 2015-10-29 11:17:08.196356 7fc3334fa7c0 10 librados: Objecter returned from call r=0 2015-10-29 11:17:08.196382 7fc3334fa7c0 10 librbd::ImageCtx: cache bytes 64 order 22 -> about 10 objects 2015-10-29 11:17:08.196386 7fc3334fa7c0 10 librbd::ImageCtx: init_layout stripe_unit 4194304 stripe_count 1 object_size 4194304 prefix rb.0.16cf.238e1f29 format rb.0.16cf.238e1f29.%012llx 2015-10-29 11:17:08.196403 7fc3334fa7c0 10 librados: set snap write context: seq = 0 and snaps = [] 2015-10-29 11:17:08.196437 7fc3334fa7c0 10 librados: set snap read head -> head 2015-10-29 11:17:08.196446 7fc3334fa7c0 20 librbd: info 0x15b8390 2015-10-29 11:17:08.196452 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390 2015-10-29 11:17:08.196461 7fc3334fa7c0 20 librbd: read_iterate 0x15b8390 off = 0 len = 1048576 2015-10-29 11:17:08.196464 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390 2015-10-29 11:17:08.196475 7fc3334fa7c0 20 librbd: aio_read 0x15b8390 completion 0x15bbc80 [0,1048576] 2015-10-29 11:17:08.196479 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390 2015-10-29 11:17:08.196504 7fc3334fa7c0 20 librbd: oid rb.0.16cf.238e1f29.000000000000 0~1048576 from [0,1048576] 2015-10-29 11:17:08.196564 7fc3334fa7c0 20 librbd::AioCompletion: AioCompletion::finish_adding_requests 0x15bbc80 pending 1 Below is my ceph.conf ---------------------- [client] rbd cache = true rbd cache size = 64 rbd cache max dirty = 32 rbd cache target dirty = 16 rbd cache max dirty age = 5.0 debug rbd = 20 debug rados = 20 debug ms = 1 log file = /var/log/ceph/client.$name.$pid.log [global] public network = 10.134.128.0/26 cluster network = 10.134.128.64/26 fsid = a0a1c9ea-9a77-41fe-aef1-b87136776ac3 # For version 0.55 and beyond, you must explicitly enable # or disable authentication with "auth" entries in [global]. auth cluster required = cephx auth service required = cephx auth client required = cephx osd journal size = 10000 [osd] osd recovery max active = 1 osd journal size = 10000 journal aio = true #The following assumes ext4 filesystem. #osd mkfs options {fs-type} = {mkfs options} # default for xfs is "-f" #osd mount options {fs-type} = {mount options} # default mount option is "rw,noatime" # For example, for ext4, the mount option might look like this: #osd mkfs options ext4 = user_xattr,rw,noatime osd mkfs options xfs = -L -f -d su=64k,sw=1 $name osd mount options btrfs = rw,noatime osd mount options xfs = rw,noatime,nodiratime filestore xattr use omap = true # osd mkfs type = btrfs osd mkfs type = xfs osd mkfs options btrfs = -L $name # CAIT -- Manual commands to make and mount file system # -- Make xfs file system # mkfs -t xfs -f -L ceph-X -d su=64k,sw=1 /dev/sdX1 # -- Rescan Parition Label # partprobe /dev/sdX1 # -- Mount ceph file system # mount -o rw,noatime,nodiratime /dev/disk/by-label/ceph-X /var/lib/ceph/osd/ceph-X [mon.durbium] host = durbium mon addr = 10.134.128.41:6789 [mon.zirconium] host = zirconium mon addr = 10.134.128.43:6789 [mon.stone] host = stone mon addr = 10.134.128.42:6789 [osd.0] host = durbium devs = /dev/vg-osd-0/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-0/lv-journal [osd.1] host = zirconium devs = /dev/vg-osd-1/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-1/lv-journal [osd.2] host = zirconium devs = /dev/vg-osd-2/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-2/lv-journal [osd.3] host = zirconium devs = /dev/vg-osd-3/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-3/lv-journal [osd.4] host = zirconium devs = /dev/vg-osd-4/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-4/lv-journal [osd.5] host = stone devs = /dev/vg-osd-5/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-5/lv-journal [osd.6] host = stone devs = /dev/vg-osd-6/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-6/lv-journal [osd.7] host = stone devs = /dev/vg-osd-7/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-7/lv-journal [osd.8] host = stone devs = /dev/vg-osd-8/lv-osd osd mkfs type = xfs osd journal = /dev/vg-osd-8/lv-journal -- Joe Ryner Center for the Application of Information Technologies (CAIT) _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com