I have a Cluster of 3 hosts, running Debian wheezy and Backports Kernel 3.16.0-0.bpo.4-amd64.
For testing I did a
~# ceph osd out 20
from a clean state.
Ceph starts rebalancing, watching ceph -w one sees changing pgs stuck unclean to get up and then go down to about 11.
Short after that the cluster keeps stuck forever in this state:
health HEALTH_WARN 68 pgs stuck unclean; recovery 450/169647 objects degraded (0.265%); 3691/169647 objects misplaced (2.176%)
According to the documentation at http://ceph.com/docs/master/rados/operations/add-or-rm-osds/ the Cluster should reach a clean state after an osd out.
What am I doing wrong?
Below some config and command outputs:
~# ceph osd tree
# id weight type name up/down reweight
-1 76.02 root default
-2 25.34 host ve51
0 3.62 osd.0 up 1
3 3.62 osd.3 up 1
6 3.62 osd.6 up 1
9 3.62 osd.9 up 1
12 3.62 osd.12 up 1
15 3.62 osd.15 up 1
18 3.62 osd.18 up 1
-3 25.34 host ve52
1 3.62 osd.1 up 1
4 3.62 osd.4 up 1
7 3.62 osd.7 up 1
10 3.62 osd.10 up 1
13 3.62 osd.13 up 1
16 3.62 osd.16 up 1
19 3.62 osd.19 up 1
-4 25.34 host ve53
2 3.62 osd.2 up 1
5 3.62 osd.5 up 1
8 3.62 osd.8 up 1
11 3.62 osd.11 up 1
14 3.62 osd.14 up 1
17 3.62 osd.17 up 1
20 3.62 osd.20 up 1
==========================
~# cat ceph.conf
[global]
fsid = 80ebba06-34f5-49fc-8178-d6cc1d1c1196
public_network = 192.168.10.0/24
cluster_network = 192.168.10.0/24
mon_initial_members = ve51, ve52, ve53
mon_host = 192.168.10.51,192.168.10.52,192.168.10.53
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
filestore_xattr_use_omap = true
mon_osd_down_out_subtree_limit = host
osd_pool_default_size=3
osd_pool_default_min_size=2
# id weight type name up/down reweight
-1 76.02 root default
-2 25.34 host ve51
0 3.62 osd.0 up 1
3 3.62 osd.3 up 1
6 3.62 osd.6 up 1
9 3.62 osd.9 up 1
12 3.62 osd.12 up 1
15 3.62 osd.15 up 1
18 3.62 osd.18 up 1
-3 25.34 host ve52
1 3.62 osd.1 up 1
4 3.62 osd.4 up 1
7 3.62 osd.7 up 1
10 3.62 osd.10 up 1
13 3.62 osd.13 up 1
16 3.62 osd.16 up 1
19 3.62 osd.19 up 1
-4 25.34 host ve53
2 3.62 osd.2 up 1
5 3.62 osd.5 up 1
8 3.62 osd.8 up 1
11 3.62 osd.11 up 1
14 3.62 osd.14 up 1
17 3.62 osd.17 up 1
20 3.62 osd.20 up 1
==========================
~# cat ceph.conf
[global]
fsid = 80ebba06-34f5-49fc-8178-d6cc1d1c1196
public_network = 192.168.10.0/24
cluster_network = 192.168.10.0/24
mon_initial_members = ve51, ve52, ve53
mon_host = 192.168.10.51,192.168.10.52,192.168.10.53
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
filestore_xattr_use_omap = true
mon_osd_down_out_subtree_limit = host
osd_pool_default_size=3
osd_pool_default_min_size=2
[osd]
osd_journal_size = 20000
osd_mount_options_xfs = noatime,nodiratime,logbsize=256k,logbufs=8,inode64
==========================
~# ceph -s
cluster 80ebba06-34f5-49fc-8178-d6cc1d1c1196
health HEALTH_OK
monmap e1: 3 mons at {ve51=192.168.10.51:6789/0,ve52=192.168.10.52:6789/0,ve53=192.168.10.53:6789/0}, election epoch 28, quorum 0,1,2 ve51,ve52,ve53
osdmap e1353: 21 osds: 21 up, 21 in
pgmap v16484: 2048 pgs, 2 pools, 219 GB data, 56549 objects
658 GB used, 77139 GB / 77797 GB avail
2048 active+clean
==========================
~# cat crushmap
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
osd_journal_size = 20000
osd_mount_options_xfs = noatime,nodiratime,logbsize=256k,logbufs=8,inode64
==========================
~# ceph -s
cluster 80ebba06-34f5-49fc-8178-d6cc1d1c1196
health HEALTH_OK
monmap e1: 3 mons at {ve51=192.168.10.51:6789/0,ve52=192.168.10.52:6789/0,ve53=192.168.10.53:6789/0}, election epoch 28, quorum 0,1,2 ve51,ve52,ve53
osdmap e1353: 21 osds: 21 up, 21 in
pgmap v16484: 2048 pgs, 2 pools, 219 GB data, 56549 objects
658 GB used, 77139 GB / 77797 GB avail
2048 active+clean
==========================
~# cat crushmap
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 osd.16
device 17 osd.17
device 18 osd.18
device 19 osd.19
device 20 osd.20
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 osd.16
device 17 osd.17
device 18 osd.18
device 19 osd.19
device 20 osd.20
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host ve51 {
id -2 # do not change unnecessarily
# weight 25.340
alg straw
hash 0 # rjenkins1
item osd.0 weight 3.620
item osd.3 weight 3.620
item osd.6 weight 3.620
item osd.9 weight 3.620
item osd.12 weight 3.620
item osd.15 weight 3.620
item osd.18 weight 3.620
}
host ve52 {
id -3 # do not change unnecessarily
# weight 25.340
alg straw
hash 0 # rjenkins1
item osd.1 weight 3.620
item osd.4 weight 3.620
item osd.7 weight 3.620
item osd.10 weight 3.620
item osd.13 weight 3.620
item osd.16 weight 3.620
item osd.19 weight 3.620
}
host ve53 {
id -4 # do not change unnecessarily
# weight 25.340
alg straw
hash 0 # rjenkins1
item osd.2 weight 3.620
item osd.5 weight 3.620
item osd.8 weight 3.620
item osd.11 weight 3.620
item osd.14 weight 3.620
item osd.17 weight 3.620
item osd.20 weight 3.620
}
root default {
id -1 # do not change unnecessarily
# weight 76.020
alg straw
hash 0 # rjenkins1
item ve51 weight 25.340
item ve52 weight 25.340
item ve53 weight 25.340
}
host ve51 {
id -2 # do not change unnecessarily
# weight 25.340
alg straw
hash 0 # rjenkins1
item osd.0 weight 3.620
item osd.3 weight 3.620
item osd.6 weight 3.620
item osd.9 weight 3.620
item osd.12 weight 3.620
item osd.15 weight 3.620
item osd.18 weight 3.620
}
host ve52 {
id -3 # do not change unnecessarily
# weight 25.340
alg straw
hash 0 # rjenkins1
item osd.1 weight 3.620
item osd.4 weight 3.620
item osd.7 weight 3.620
item osd.10 weight 3.620
item osd.13 weight 3.620
item osd.16 weight 3.620
item osd.19 weight 3.620
}
host ve53 {
id -4 # do not change unnecessarily
# weight 25.340
alg straw
hash 0 # rjenkins1
item osd.2 weight 3.620
item osd.5 weight 3.620
item osd.8 weight 3.620
item osd.11 weight 3.620
item osd.14 weight 3.620
item osd.17 weight 3.620
item osd.20 weight 3.620
}
root default {
id -1 # do not change unnecessarily
# weight 76.020
alg straw
hash 0 # rjenkins1
item ve51 weight 25.340
item ve52 weight 25.340
item ve53 weight 25.340
}
# rules
rule replicated_ruleset {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule replicated_ruleset {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com