New crushmap causes pgs stuck unclean

Darryl Bond <dbond@xxxxxxxxxxxxx> · Wed, 20 Mar 2013 09:56:45 +1000

My ceph cluster consistes of 3 hosts in 3 locations, each with 2 SSD and
4 spinning disks.
I have created a fresh ceph filesystem and start up ceph.

Ceph health report HEALTH_OK.

I created a crushmap to suit our installation where each host will be in
separate racks, based on the example in the doco.
The cluster then comes up:

   health HEALTH_WARN 183 pgs stuck unclean
   monmap e1: 3 mons at
{a=192.168.6.101:6789/0,b=192.168.6.102:6789/0,c=192.168.6.103:6789/0},
election epoch 8, quorum 0,1,2 a,b,c
   osdmap e43: 18 osds: 18 up, 18 in
    pgmap v698: 3648 pgs: 3465 active+clean, 183 active+remapped; 0
bytes data, 672 MB used, 47137 GB / 47137 GB avail
   mdsmap e1: 0/0/1 up

# ceph osd tree

# id    weight    type name    up/down    reweight
-12    3    root ssd
-3    1        rack ServerRoom-ssd
-2    1            host ceph1-ssd
10    0.5                osd.10    up    1
11    0.5                osd.11    up    1
-6    1        rack PABXRoom-ssd
-4    1            host ceph2-ssd
20    0.5                osd.20    up    1
21    0.5                osd.21    up    1
-15    1        rack BackupCub-ssd
-5    1            host ceph3-ssd
30    0.5                osd.30    up    1
31    0.5                osd.31    up    1
-8    48    root spin
-13    16        rack ServerRoom-spin
-9    16            host ceph1-spin
12    4                osd.12    up    1
13    4                osd.13    up    1
14    4                osd.14    up    1
15    4                osd.15    up    1
-14    16        rack PABXRoom-spin
-10    16            host ceph2-spin
22    4                osd.22    up    1
23    4                osd.23    up    1
24    4                osd.24    up    1
25    4                osd.25    up    1
-7    16        rack BackupCub-spin
-11    16            host ceph3-spin
32    4                osd.32    up    1
33    4                osd.33    up    1
34    4                osd.34    up    1
35    4                osd.35    up    1

Installing the default crushmap again and the cluster comes back health ok?

What is wrong with my crushmap?

Darryl

# begin crush map

# devices
device 0 device0
device 1 device1
device 2 device2
device 3 device3
device 4 device4
device 5 device5
device 6 device6
device 7 device7
device 8 device8
device 9 device9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 device16
device 17 device17
device 18 device18
device 19 device19
device 20 osd.20
device 21 osd.21
device 22 osd.22
device 23 osd.23
device 24 osd.24
device 25 osd.25
device 26 device26
device 27 device27
device 28 device28
device 29 device29
device 30 osd.30
device 31 osd.31
device 32 osd.32
device 33 osd.33
device 34 osd.34
device 35 osd.35

# types
type 0 osd
type 1 host
type 2 rack
type 3 row
type 4 room
type 5 datacenter
type 6 root

# buckets
host ceph1-ssd {
    id -2        # do not change unnecessarily
    # weight 2.000
    alg straw
    hash 0    # rjenkins1
    item osd.10 weight 0.5
    item osd.11 weight 0.5
}
host ceph1-spin {
    id -9        # do not change unnecessarily
    # weight 32.000
    alg straw
    hash 0    # rjenkins1
    item osd.12 weight 4.0
    item osd.13 weight 4.0
    item osd.14 weight 4.0
    item osd.15 weight 4.0
}
host ceph2-ssd {
    id -4        # do not change unnecessarily
    # weight 2.000
    alg straw
    hash 0    # rjenkins1
    item osd.20 weight 0.5
    item osd.21 weight 0.5
}
host ceph2-spin {
    id -10        # do not change unnecessarily
    # weight 32.000
    alg straw
    hash 0    # rjenkins1
    item osd.22 weight 4.0
    item osd.23 weight 4.0
    item osd.24 weight 4.0
    item osd.25 weight 4.0
}
host ceph3-ssd {
    id -5        # do not change unnecessarily
    # weight 2.000
    alg straw
    hash 0    # rjenkins1
    item osd.30 weight 0.5
    item osd.31 weight 0.5
}
host ceph3-spin {
    id -11        # do not change unnecessarily
    # weight 32.000
    alg straw
    hash 0    # rjenkins1
    item osd.32 weight 4.0
    item osd.33 weight 4.0
    item osd.34 weight 4.0
    item osd.35 weight 4.0
}
rack ServerRoom-ssd {
    id -3        # do not change unnecessarily
    # weight 1.000
    alg straw
    hash 0    # rjenkins1
    item ceph1-ssd weight 1.0
}
rack ServerRoom-spin {
    id -13        # do not change unnecessarily
    # weight 16.000
    alg straw
    hash 0    # rjenkins1
    item ceph1-spin weight 16.0
}
rack PABXRoom-ssd {
    id -6        # do not change unnecessarily
    # weight 1.000
    alg straw
    hash 0    # rjenkins1
    item ceph2-ssd weight 1.000
}
rack PABXRoom-spin {
    id -14        # do not change unnecessarily
    # weight 16.000
    alg straw
    hash 0    # rjenkins1
    item ceph2-spin weight 16.000
}
rack BackupCub-spin {
    id -7        # do not change unnecessarily
    # weight 16.000
    alg straw
    hash 0    # rjenkins1
    item ceph3-spin weight 16.000
}
rack BackupCub-ssd {
    id -15        # do not change unnecessarily
    # weight 1.000
    alg straw
    hash 0    # rjenkins1
    item ceph3-ssd weight 1.000
}
root spin {
    id -8        # do not change unnecessarily
    # weight 48.000
    alg straw
    hash 0    # rjenkins1
    item ServerRoom-spin weight 16.000
    item PABXRoom-spin weight 16.000
    item BackupCub-spin weight 16.000
}
root ssd {
    id -12        # do not change unnecessarily
    # weight 3.000
    alg straw
    hash 0    # rjenkins1
    item ServerRoom-ssd weight 1.000
    item PABXRoom-ssd weight 1.000
    item BackupCub-ssd weight 1.000
}

# rules
rule data {
    ruleset 0
    type replicated
    min_size 1
    max_size 10
    step take spin
    step chooseleaf firstn 0 type host
    step emit
}
rule metadata {
    ruleset 1
    type replicated
    min_size 1
    max_size 10
    step take spin
    step chooseleaf firstn 0 type host
    step emit
}
rule rbd {
    ruleset 2
    type replicated
    min_size 1
    max_size 10
    step take spin
    step chooseleaf firstn 0 type host
    step emit
}
rule spin {
        ruleset 3
        type replicated
        min_size 0
        max_size 10
        step take spin
        step chooseleaf firstn 0 type host
        step emit
}

rule ssd {
        ruleset 4
        type replicated
        min_size 0
        max_size 10
        step take ssd
        step chooseleaf firstn 0 type host
        step emit
}

rule ssd-primary {
        ruleset 5
        type replicated
        min_size 0
        max_size 10
        step take ssd
        step chooseleaf firstn 1 type host
        step emit
        step take spin
        step chooseleaf firstn -1 type host
        step emit
}

# end crush map

The contents of this electronic message and any attachments are intended only for the addressee and may contain legally privileged, personal, sensitive or confidential information. If you are not the intended addressee, and have received this email, any transmission, distribution, downloading, printing or photocopying of the contents of this message or attachments is strictly prohibited. Any legal privilege or confidentiality attached to this message and attachments is not waived, lost or destroyed by reason of delivery to any person other than intended addressee. If you have received this message and are not the intended addressee you should notify the sender by return email and destroy all copies of the message and any attachments. Unless expressly attributed, the views expressed in this email do not necessarily represent the views of the company.
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com