rbd hang

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



i,

I am having a strange problem with our development cluster.  When I run rbd export it just hangs.  I have been running ceph for a long time and haven't encountered this kind of issue.  Any ideas as to what is going on?

rbd -p locks export seco101ira -


I am running

Centos 6.6 x86 64 

ceph version 0.80.10 (ea6c958c38df1216bf95c927f143d8b13c4a9e70)

I have enabled debugging and get the following when I run the command

[root@durbium ~]# rbd -p locks export seco101ira -
2015-10-29 11:17:08.183597 7fc3334fa7c0  1 librados: starting msgr at :/0
2015-10-29 11:17:08.183613 7fc3334fa7c0  1 librados: starting objecter
2015-10-29 11:17:08.183739 7fc3334fa7c0  1 -- :/0 messenger.start
2015-10-29 11:17:08.183779 7fc3334fa7c0  1 librados: setting wanted keys
2015-10-29 11:17:08.183782 7fc3334fa7c0  1 librados: calling monclient init
2015-10-29 11:17:08.184365 7fc3334fa7c0  1 -- :/1024687 --> 10.134.128.42:6789/0 -- auth(proto 0 30 bytes epoch 0) v1 -- ?+0 0x15ba900 con 0x15ba540
2015-10-29 11:17:08.185006 7fc3334f2700  1 -- 10.134.128.41:0/1024687 learned my addr 10.134.128.41:0/1024687
2015-10-29 11:17:08.185995 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 1 ==== mon_map v1 ==== 491+0+0 (318324477 0 0) 0x7fc318000be0 con 0x15ba540
2015-10-29 11:17:08.186213 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 2 ==== auth_reply(proto 2 0 (0) Success) v1 ==== 33+0+0 (4093383511 0 0) 0x7fc318001090 con 0x15ba540
2015-10-29 11:17:08.186544 7fc32da9a700  1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- auth(proto 2 32 bytes epoch 0) v1 -- ?+0 0x7fc31c001700 con 0x15ba540
2015-10-29 11:17:08.187160 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 3 ==== auth_reply(proto 2 0 (0) Success) v1 ==== 206+0+0 (2382192463 0 0) 0x7fc318001090 con 0x15ba540
2015-10-29 11:17:08.187354 7fc32da9a700  1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- auth(proto 2 165 bytes epoch 0) v1 -- ?+0 0x7fc31c002220 con 0x15ba540
2015-10-29 11:17:08.188001 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 4 ==== auth_reply(proto 2 0 (0) Success) v1 ==== 393+0+0 (34117402 0 0) 0x7fc3180008c0 con 0x15ba540
2015-10-29 11:17:08.188148 7fc32da9a700  1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- mon_subscribe({monmap=0+}) v2 -- ?+0 0x15b6b80 con 0x15ba540
2015-10-29 11:17:08.188334 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- mon_subscribe({monmap=6+,osdmap=0}) v2 -- ?+0 0x15b7700 con 0x15ba540
2015-10-29 11:17:08.188355 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 --> 10.134.128.42:6789/0 -- mon_subscribe({monmap=6+,osdmap=0}) v2 -- ?+0 0x15b7ca0 con 0x15ba540
2015-10-29 11:17:08.188445 7fc3334fa7c0  1 librados: init done
2015-10-29 11:17:08.188463 7fc3334fa7c0 10 librados: wait_for_osdmap waiting
2015-10-29 11:17:08.188625 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 5 ==== mon_map v1 ==== 491+0+0 (318324477 0 0) 0x7fc318001300 con 0x15ba540
2015-10-29 11:17:08.188795 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 6 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0 (646930372 0 0) 0x7fc3180015a0 con 0x15ba540
2015-10-29 11:17:08.189129 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 7 ==== osd_map(4350..4350 src has 3829..4350) v3 ==== 7562+0+0 (1787729222 0 0) 0x7fc3180013b0 con 0x15ba540
2015-10-29 11:17:08.189452 7fc3334fa7c0 10 librados: wait_for_osdmap done waiting
2015-10-29 11:17:08.189454 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 8 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0 (646930372 0 0) 0x7fc3180008c0 con 0x15ba540
2015-10-29 11:17:08.189470 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 9 ==== osd_map(4350..4350 src has 3829..4350) v3 ==== 7562+0+0 (1787729222 0 0) 0x7fc318005290 con 0x15ba540
2015-10-29 11:17:08.189485 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== mon.1 10.134.128.42:6789/0 10 ==== mon_subscribe_ack(300s) v1 ==== 20+0+0 (646930372 0 0) 0x7fc3180056d0 con 0x15ba540
2015-10-29 11:17:08.189522 7fc3334fa7c0 20 librbd::ImageCtx: enabling caching...
2015-10-29 11:17:08.189540 7fc3334fa7c0 20 librbd::ImageCtx: Initial cache settings: size=64 num_objects=10 max_dirty=32 target_dirty=16 max_dirty_age=5
2015-10-29 11:17:08.189686 7fc3334fa7c0 20 librbd: open_image: ictx = 0x15b8390 name = 'seco101ira' id = '' snap_name = ''
2015-10-29 11:17:08.189730 7fc3334fa7c0 10 librados: stat oid=seco101ira.rbd nspace=
2015-10-29 11:17:08.189882 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:1 seco101ira.rbd [stat] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15baf60 con 0x15b9e70
2015-10-29 11:17:08.192470 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 1 ==== osd_op_reply(1 seco101ira.rbd [stat] v0'0 uv1 ondisk = 0) v6 ==== 181+0+16 (1355327314 0 448133945) 0x7fc314000c10 con 0x15b9e70
2015-10-29 11:17:08.192548 7fc3334fa7c0 10 librados: Objecter returned from stat r=0
2015-10-29 11:17:08.192563 7fc3334fa7c0 20 librbd: detect format of seco101ira : old
2015-10-29 11:17:08.192600 7fc3334fa7c0 20 librbd: ictx_refresh 0x15b8390
2015-10-29 11:17:08.192614 7fc3334fa7c0 10 librados: read oid=seco101ira.rbd nspace=
2015-10-29 11:17:08.192631 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:2 seco101ira.rbd [read 0~4096] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bb700 con 0x15b9e70
2015-10-29 11:17:08.193957 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 2 ==== osd_op_reply(2 seco101ira.rbd [read 0~112] v0'0 uv1 ondisk = 0) v6 ==== 181+0+112 (674783017 0 641510945) 0x7fc314000c10 con 0x15b9e70
2015-10-29 11:17:08.194018 7fc3334fa7c0 10 librados: Objecter returned from read r=0
2015-10-29 11:17:08.194022 7fc3334fa7c0 10 librados: Returned length 112 less than original length 4096
2015-10-29 11:17:08.194058 7fc3334fa7c0 10 librados: call oid=seco101ira.rbd nspace=
2015-10-29 11:17:08.194074 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:3 seco101ira.rbd [call rbd.snap_list] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bbd40 con 0x15b9e70
2015-10-29 11:17:08.194845 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 3 ==== osd_op_reply(3 seco101ira.rbd [call] v0'0 uv1 ondisk = 0) v6 ==== 181+0+12 (1759032444 0 0) 0x7fc314000c10 con 0x15b9e70
2015-10-29 11:17:08.194902 7fc3334fa7c0 10 librados: Objecter returned from call r=0
2015-10-29 11:17:08.194944 7fc3334fa7c0 10 librados: call oid=seco101ira.rbd nspace=
2015-10-29 11:17:08.194963 7fc3334fa7c0  1 -- 10.134.128.41:0/1024687 --> 10.134.128.43:6803/2741 -- osd_op(client.7543.0:4 seco101ira.rbd [call lock.get_info] 4.a982c550 ack+read e4350) v4 -- ?+0 0x15bd450 con 0x15b9e70
2015-10-29 11:17:08.196313 7fc32da9a700  1 -- 10.134.128.41:0/1024687 <== osd.2 10.134.128.43:6803/2741 4 ==== osd_op_reply(4 seco101ira.rbd [call] v0'0 uv1 ondisk = 0) v6 ==== 181+0+15 (3120959177 0 2149983739) 0x7fc314000c10 con 0x15b9e70
2015-10-29 11:17:08.196356 7fc3334fa7c0 10 librados: Objecter returned from call r=0
2015-10-29 11:17:08.196382 7fc3334fa7c0 10 librbd::ImageCtx:  cache bytes 64 order 22 -> about 10 objects
2015-10-29 11:17:08.196386 7fc3334fa7c0 10 librbd::ImageCtx: init_layout stripe_unit 4194304 stripe_count 1 object_size 4194304 prefix rb.0.16cf.238e1f29 format rb.0.16cf.238e1f29.%012llx
2015-10-29 11:17:08.196403 7fc3334fa7c0 10 librados: set snap write context: seq = 0 and snaps = []
2015-10-29 11:17:08.196437 7fc3334fa7c0 10 librados: set snap read head -> head
2015-10-29 11:17:08.196446 7fc3334fa7c0 20 librbd: info 0x15b8390
2015-10-29 11:17:08.196452 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390
2015-10-29 11:17:08.196461 7fc3334fa7c0 20 librbd: read_iterate 0x15b8390 off = 0 len = 1048576
2015-10-29 11:17:08.196464 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390
2015-10-29 11:17:08.196475 7fc3334fa7c0 20 librbd: aio_read 0x15b8390 completion 0x15bbc80 [0,1048576]
2015-10-29 11:17:08.196479 7fc3334fa7c0 20 librbd: ictx_check 0x15b8390
2015-10-29 11:17:08.196504 7fc3334fa7c0 20 librbd:  oid rb.0.16cf.238e1f29.000000000000 0~1048576 from [0,1048576]
2015-10-29 11:17:08.196564 7fc3334fa7c0 20 librbd::AioCompletion: AioCompletion::finish_adding_requests 0x15bbc80 pending 1

Below is my ceph.conf
----------------------

[client]
        rbd cache = true
        rbd cache size = 64
        rbd cache max dirty = 32
        rbd cache target dirty = 16
        rbd cache max dirty age = 5.0

        debug rbd = 20
        debug rados = 20
        debug ms = 1
        log file = /var/log/ceph/client.$name.$pid.log
[global]
         public network = 10.134.128.0/26
         cluster network = 10.134.128.64/26
         fsid = a0a1c9ea-9a77-41fe-aef1-b87136776ac3

         # For version 0.55 and beyond, you must explicitly enable 
         # or disable authentication with "auth" entries in [global].
 
          auth cluster required = cephx
         auth service required = cephx
         auth client required = cephx
                osd journal size = 10000

[osd]
         osd recovery max active = 1

         osd journal size = 10000
         journal aio = true
 
         #The following assumes ext4 filesystem.
         #osd mkfs options {fs-type} = {mkfs options}   # default for xfs is "-f" 
         #osd mount options {fs-type} = {mount options} # default mount option is "rw,noatime"
         # For example, for ext4, the mount option might look like this:
         #osd mkfs options ext4 = user_xattr,rw,noatime
        
         osd mkfs options xfs = -L -f -d su=64k,sw=1 $name
         osd mount options btrfs = rw,noatime
         osd mount options xfs = rw,noatime,nodiratime

         filestore xattr use omap = true
         # osd mkfs type = btrfs
         osd mkfs type = xfs
         osd mkfs options btrfs = -L $name


        # CAIT -- Manual commands to make and mount file system

        # -- Make xfs file system
        # mkfs -t xfs -f -L ceph-X -d su=64k,sw=1 /dev/sdX1

        # -- Rescan Parition Label
        # partprobe /dev/sdX1

        # -- Mount ceph file system
        # mount -o rw,noatime,nodiratime /dev/disk/by-label/ceph-X /var/lib/ceph/osd/ceph-X

[mon.durbium]
        host = durbium
        mon addr = 10.134.128.41:6789

[mon.zirconium]
        host = zirconium
        mon addr = 10.134.128.43:6789

[mon.stone]
        host = stone
        mon addr = 10.134.128.42:6789

[osd.0]
         host = durbium
         devs = /dev/vg-osd-0/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-0/lv-journal

[osd.1]
         host = zirconium
         devs = /dev/vg-osd-1/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-1/lv-journal

[osd.2]
         host = zirconium
         devs = /dev/vg-osd-2/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-2/lv-journal


[osd.3]
         host = zirconium
         devs = /dev/vg-osd-3/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-3/lv-journal

[osd.4]
         host = zirconium
         devs = /dev/vg-osd-4/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-4/lv-journal
[osd.5]
         host = stone
         devs = /dev/vg-osd-5/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-5/lv-journal

[osd.6]
         host = stone
         devs = /dev/vg-osd-6/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-6/lv-journal

[osd.7]
         host = stone
         devs = /dev/vg-osd-7/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-7/lv-journal

[osd.8]
         host = stone
         devs = /dev/vg-osd-8/lv-osd
         osd mkfs type = xfs
         osd journal = /dev/vg-osd-8/lv-journal




-- 
Joe Ryner
Center for the Application of Information Technologies (CAIT)
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux