2 datacenters. -Sam On Mon, Dec 14, 2015 at 10:17 AM, Reno Rainz <rainzreno@xxxxxxxxx> wrote: > Hi, > > I got a functionnal and operationnal ceph cluster (in version 0.94.5), with > 3 nodes (acting for MON and OSD), everything was fine. > > I added a 4th osd node (same configuration than 3 others) and now cluster > status is health warn (active+remapped). > > > cluster e821c68f-995c-41a9-9c46-dbbd0a28b8c7 > health HEALTH_WARN > 256 pgs stuck unclean > recovery 279/1245 objects degraded (22.410%) > recovery 415/1245 objects misplaced (33.333%) > pool rbd pg_num 128 > pgp_num 64 > pool data pg_num 128 > pgp_num 100 > monmap e1: 3 mons at > {ceph-osd-1=10.200.1.11:6789/0,ceph-osd-2=10.200.1.12:6789/0,ceph-osd-3=10.200.1.13:6789/0} > election epoch 4, quorum 0,1,2 ceph-osd-1,ceph-osd-2,ceph-osd-3 > osdmap e57: 8 osds: 8 up, 8 in; 256 remapped pgs > pgmap v948: 256 pgs, 2 pools, 1566 MB data, 415 objects > 14929 MB used, 38237 MB / 55717 MB avail > 279/1245 objects degraded (22.410%) > 415/1245 objects misplaced (33.333%) > 256 active+remapped > > > OSD Tree > root@ceph-osd-1:~# ceph osd tree > ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY > -8 4.00000 root default > -7 4.00000 region eu-west-1 > -5 1.00000 datacenter eu-west-1a > -2 1.00000 host ceph-osd-1 > 0 1.00000 osd.0 up 1.00000 1.00000 > 1 1.00000 osd.1 up 1.00000 1.00000 > -4 1.00000 host ceph-osd-3 > 4 1.00000 osd.4 up 1.00000 1.00000 > 5 1.00000 osd.5 up 1.00000 1.00000 > -6 1.00000 datacenter eu-west-1b > -3 1.00000 host ceph-osd-2 > 2 1.00000 osd.2 up 1.00000 1.00000 > 3 1.00000 osd.3 up 1.00000 1.00000 > -9 1.00000 host ceph-osd-4 > 6 1.00000 osd.6 up 1.00000 1.00000 > 7 1.00000 osd.7 up 1.00000 1.00000 > root@ceph-osd-1:~# > > I'm using this crush map : > > { > "devices": [ > { > "id": 0, > "name": "osd.0" > }, > { > "id": 1, > "name": "osd.1" > }, > { > "id": 2, > "name": "osd.2" > }, > { > "id": 3, > "name": "osd.3" > }, > { > "id": 4, > "name": "osd.4" > }, > { > "id": 5, > "name": "osd.5" > }, > { > "id": 6, > "name": "osd.6" > }, > { > "id": 7, > "name": "osd.7" > } > ], > "types": [ > { > "type_id": 0, > "name": "osd" > }, > { > "type_id": 1, > "name": "host" > }, > { > "type_id": 2, > "name": "chassis" > }, > { > "type_id": 3, > "name": "rack" > }, > { > "type_id": 4, > "name": "row" > }, > { > "type_id": 5, > "name": "pdu" > }, > { > "type_id": 6, > "name": "pod" > }, > { > "type_id": 7, > "name": "room" > }, > { > "type_id": 8, > "name": "datacenter" > }, > { > "type_id": 9, > "name": "region" > }, > { > "type_id": 10, > "name": "root" > } > ], > "buckets": [ > { > "id": -2, > "name": "ceph-osd-1", > "type_id": 1, > "type_name": "host", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": 0, > "weight": 65536, > "pos": 0 > }, > { > "id": 1, > "weight": 65536, > "pos": 1 > } > ] > }, > { > "id": -3, > "name": "ceph-osd-2", > "type_id": 1, > "type_name": "host", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": 2, > "weight": 65536, > "pos": 0 > }, > { > "id": 3, > "weight": 65536, > "pos": 1 > } > ] > }, > { > "id": -4, > "name": "ceph-osd-3", > "type_id": 1, > "type_name": "host", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": 4, > "weight": 65536, > "pos": 0 > }, > { > "id": 5, > "weight": 65536, > "pos": 1 > } > ] > }, > { > "id": -5, > "name": "eu-west-1a", > "type_id": 8, > "type_name": "datacenter", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": -2, > "weight": 65536, > "pos": 0 > }, > { > "id": -4, > "weight": 65536, > "pos": 1 > } > ] > }, > { > "id": -6, > "name": "eu-west-1b", > "type_id": 8, > "type_name": "datacenter", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": -3, > "weight": 65536, > "pos": 0 > }, > { > "id": -9, > "weight": 65536, > "pos": 1 > } > ] > }, > { > "id": -7, > "name": "eu-west-1", > "type_id": 9, > "type_name": "region", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": -5, > "weight": 65536, > "pos": 0 > }, > { > "id": -6, > "weight": 65536, > "pos": 1 > } > ] > }, > { > "id": -8, > "name": "default", > "type_id": 10, > "type_name": "root", > "weight": 262144, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": -7, > "weight": 262144, > "pos": 0 > } > ] > }, > { > "id": -9, > "name": "ceph-osd-4", > "type_id": 1, > "type_name": "host", > "weight": 131072, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": 6, > "weight": 65536, > "pos": 0 > }, > { > "id": 7, > "weight": 65536, > "pos": 1 > } > ] > } > ], > "rules": [ > { > "rule_id": 0, > "rule_name": "replicated_ruleset", > "ruleset": 0, > "type": 1, > "min_size": 1, > "max_size": 10, > "steps": [ > { > "op": "take", > "item": -8, > "item_name": "default" > }, > { > "op": "choose_firstn", > "num": 0, > "type": "datacenter" > }, > { > "op": "chooseleaf_firstn", > "num": 1, > "type": "host" > }, > { > "op": "emit" > } > ] > } > ], > "tunables": { > "choose_local_tries": 0, > "choose_local_fallback_tries": 0, > "choose_total_tries": 50, > "chooseleaf_descend_once": 1, > "chooseleaf_vary_r": 1, > "straw_calc_version": 1, > "allowed_bucket_algs": 54, > "profile": "hammer", > "optimal_tunables": 0, > "legacy_tunables": 0, > "require_feature_tunables": 1, > "require_feature_tunables2": 1, > "require_feature_tunables3": 1, > "has_v2_rules": 0, > "has_v3_rules": 0, > "has_v4_buckets": 0 > } > } > > > I read a thread > (http://lists.ceph.com/pipermail/ceph-users-ceph.com/2013-November/006017.html) > from this mailling list, I tried everything (tunnable to optimal, add more > pg, use the same weight ), but I still got this issue. > > Do you have any ideas to fix this situation ? > > > > _______________________________________________ > ceph-users mailing list > ceph-users@xxxxxxxxxxxxxx > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com