Re: pgs stuck unclean since forever, current state active+remapped

不坏阿峰 <onlydebian@xxxxxxxxx> · Fri, 16 Aug 2013 09:57:27 +0700

many thanks . i did and resolved it by :

#ceph osd getcrushmap -o /tmp/crush
#crushtool -i /tmp/crush --enable-unsafe-tunables
--set-choose-local-tries 0 --set-choose-local-fallback-tries 0
--set-choose-total-tries 50 -o /tmp/crush.new
root@ceph-admin:/etc/ceph# ceph osd setcrushmap -i /tmp/crush.new

so far , health ok

2013/8/16 Gregory Farnum <greg@xxxxxxxxxxx>:
> They're unclean because CRUSH isn't generating an acting set of
> sufficient size so the OSDs/monitors are keeping them remapped in
> order to maintain replication guarantees. Look in the docs for the
> crush tunables options for a discussion on this.
> -Greg
> Software Engineer #42 @ http://inktank.com | http://ceph.com
>
>
> On Mon, Aug 12, 2013 at 7:16 PM, 不坏阿峰 <onlydebian@xxxxxxxxx> wrote:
>> i got PGs stuck long time.   do not how to fix it.  can some person help to
>> check?
>>
>> Environment： Debian 7 + ceph 0.617
>>
>> ----------------
>> root@ceph-admin:~# ceph -s
>>    health HEALTH_WARN 6 pgs stuck unclean
>>    monmap e2: 2 mons at {a=192.168.250.15:6789/0,b=192.168.250.8:6789/0},
>> election epoch 8, quorum 0,1 a,b
>>    osdmap e159: 4 osds: 4 up, 4 in
>>     pgmap v23487: 584 pgs: 578 active+clean, 6 active+remapped; 4513 MB
>> data, 12658 MB used, 387 GB / 399 GB avail; 426B/s wr, 0op/s
>>    mdsmap e114: 1/1/1 up {0=a=up:active}, 1 up:standby
>>
>> --------------
>> root@ceph-admin:~# ceph health detail
>> HEALTH_WARN 6 pgs stuck unclean
>> pg 0.50 is stuck unclean since forever, current state active+remapped, last
>> acting [3,1]
>> pg 1.4f is stuck unclean since forever, current state active+remapped, last
>> acting [3,1]
>> pg 2.4e is stuck unclean since forever, current state active+remapped, last
>> acting [3,1]
>> pg 1.8a is stuck unclean since forever, current state active+remapped, last
>> acting [2,1]
>> pg 0.8b is stuck unclean since forever, current state active+remapped, last
>> acting [2,1]
>> pg 2.89 is stuck unclean since forever, current state active+remapped, last
>> acting [2,1]
>> --------------
>> root@ceph-admin:~# ceph osd tree
>>
>> # id    weight  type name       up/down reweight
>> -1      4       root default
>> -3      2        rack unknownrack
>> -2      2               host ceph-admin
>> 0       1                       osd.0   up      1
>> 1       1                       osd.1   up      1
>> -4      1        host ceph-node02
>> 2       1               osd.2   down    1
>> -5      1        host ceph-node01
>> 3       1               osd.3   up      1
>> -----------------------
>> root@ceph-admin:~# ceph osd dump
>>
>> epoch 159
>> fsid db32486a-7ad3-4afe-8b67-49ee2a6dcecf
>> created 2013-08-08 13:45:52.579015
>> modified 2013-08-12 05:18:37.895385
>> flags
>>
>> pool 0 'data' rep size 2 min_size 1 crush_ruleset 0 object_hash rjenkins
>> pg_num 192 pgp_num 192 last_change 1 owner 0 crash_replay_interval 45
>> pool 1 'metadata' rep size 2 min_size 1 crush_ruleset 1 object_hash rjenkins
>> pg_num 192 pgp_num 192 last_change 1 owner 0
>> pool 2 'rbd' rep size 2 min_size 1 crush_ruleset 2 object_hash rjenkins
>> pg_num 192 pgp_num 192 last_change 1 owner 0
>> pool 3 'volumes' rep size 2 min_size 1 crush_ruleset 0 object_hash rjenkins
>> pg_num 8 pgp_num 8 last_change 39 owner 18446744073709551615
>>
>> max_osd 5
>> osd.0 up   in  weight 1 up_from 138 up_thru 157 down_at 137
>> last_clean_interval [45,135) 192.168.250.15:6803/5735
>> 192.168.250.15:6804/5735 192.168.250.15:6805/5735 exists,up
>> 99f2aec0-2367-4b68-86f2-58d6d41589c6
>> osd.1 up   in  weight 1 up_from 140 up_thru 157 down_at 137
>> last_clean_interval [47,136) 192.168.250.15:6806/6882
>> 192.168.250.15:6807/6882 192.168.250.15:6808/6882 exists,up
>> d458ca35-ec55-47a9-a7ce-47b9ddf4d889
>> osd.2 up   in  weight 1 up_from 157 up_thru 158 down_at 135
>> last_clean_interval [48,134) 192.168.250.8:6800/3564 192.168.250.8:6801/3564
>> 192.168.250.8:6802/3564 exists,up c4ee9f05-bd5f-4536-8cb8-0af82c00d3d6
>> osd.3 up   in  weight 1 up_from 143 up_thru 157 down_at 141
>> last_clean_interval [53,141) 192.168.250.16:6802/14618
>> 192.168.250.16:6804/14618 192.168.250.16:6805/14618 exists,up
>> e9d67b85-97d1-4635-95c8-f7c50cd7f6b1
>>
>> pg_temp 0.50 [3,1]
>> pg_temp 0.8b [2,1]
>> pg_temp 1.4f [3,1]
>> pg_temp 1.8a [2,1]
>> pg_temp 2.4e [3,1]
>> pg_temp 2.89 [2,1]
>> --------------------------
>> root@ceph-admin:/etc/ceph# crushtool -d /tmp/crushmap
>> # begin crush map
>>
>> # devices
>> device 0 osd.0
>> device 1 osd.1
>> device 2 osd.2
>> device 3 osd.3
>>
>> # types
>> type 0 osd
>> type 1 host
>> type 2 rack
>> type 3 row
>> type 4 room
>> type 5 datacenter
>> type 6 root
>>
>> # buckets
>> host ceph-admin {
>>         id -2           # do not change unnecessarily
>>         # weight 2.000
>>         alg straw
>>         hash 0  # rjenkins1
>>         item osd.0 weight 1.000
>>         item osd.1 weight 1.000
>> }
>> rack unknownrack {
>>         id -3           # do not change unnecessarily
>>         # weight 2.000
>>         alg straw
>>         hash 0  # rjenkins1
>>         item ceph-admin weight 2.000
>> }
>> host ceph-node02 {
>>         id -4           # do not change unnecessarily
>>         # weight 1.000
>>         alg straw
>>         hash 0  # rjenkins1
>>         item osd.2 weight 1.000
>> }
>> host ceph-node01 {
>>         id -5           # do not change unnecessarily
>>         # weight 1.000
>>         alg straw
>>         hash 0  # rjenkins1
>>         item osd.3 weight 1.000
>> }
>> root default {
>>         id -1           # do not change unnecessarily
>>         # weight 4.000
>>         alg straw
>>         hash 0  # rjenkins1
>>         item unknownrack weight 2.000
>>         item ceph-node02 weight 1.000
>>         item ceph-node01 weight 1.000
>> }
>>
>> # rules
>> rule data {
>>         ruleset 0
>>         type replicated
>>         min_size 1
>>         max_size 10
>>         step take default
>>         step choose firstn 0 type osd
>>         step emit
>> }
>> rule volumes {
>>         ruleset 3
>>         type replicated
>>         min_size 1
>>         max_size 10
>>         step take default
>>         step choose firstn 0 type osd
>>         step emit
>> }
>> rule metadata {
>>         ruleset 1
>>         type replicated
>>         min_size 1
>>         max_size 10
>>         step take default
>>         step choose firstn 0 type osd
>>         step emit
>> }
>> rule rbd {
>>         ruleset 2
>>         type replicated
>>         min_size 1
>>         max_size 10
>>         step take default
>>         step choose firstn 0 type osd
>>         step emit
>> }
>>
>> # end crush map
>> -----------
>>
>> _______________________________________________
>> ceph-users mailing list
>> ceph-users@xxxxxxxxxxxxxx
>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>>
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com