Re: pgs stuck unclean since forever, current state active+remapped

Gregory Farnum <greg@xxxxxxxxxxx> · Thu, 15 Aug 2013 10:26:04 -0700



They're unclean because CRUSH isn't generating an acting set of
sufficient size so the OSDs/monitors are keeping them remapped in
order to maintain replication guarantees. Look in the docs for the
crush tunables options for a discussion on this.
-Greg
Software Engineer #42 @ http://inktank.com | http://ceph.com


On Mon, Aug 12, 2013 at 7:16 PM, 不坏阿峰 <onlydebian@xxxxxxxxx> wrote:
> i got PGs stuck long time.   do not how to fix it.  can some person help to
> check?
>
> Environment： Debian 7 + ceph 0.617
>
> ----------------
> root@ceph-admin:~# ceph -s
>    health HEALTH_WARN 6 pgs stuck unclean
>    monmap e2: 2 mons at {a=192.168.250.15:6789/0,b=192.168.250.8:6789/0},
> election epoch 8, quorum 0,1 a,b
>    osdmap e159: 4 osds: 4 up, 4 in
>     pgmap v23487: 584 pgs: 578 active+clean, 6 active+remapped; 4513 MB
> data, 12658 MB used, 387 GB / 399 GB avail; 426B/s wr, 0op/s
>    mdsmap e114: 1/1/1 up {0=a=up:active}, 1 up:standby
>
> --------------
> root@ceph-admin:~# ceph health detail
> HEALTH_WARN 6 pgs stuck unclean
> pg 0.50 is stuck unclean since forever, current state active+remapped, last
> acting [3,1]
> pg 1.4f is stuck unclean since forever, current state active+remapped, last
> acting [3,1]
> pg 2.4e is stuck unclean since forever, current state active+remapped, last
> acting [3,1]
> pg 1.8a is stuck unclean since forever, current state active+remapped, last
> acting [2,1]
> pg 0.8b is stuck unclean since forever, current state active+remapped, last
> acting [2,1]
> pg 2.89 is stuck unclean since forever, current state active+remapped, last
> acting [2,1]
> --------------
> root@ceph-admin:~# ceph osd tree
>
> # id    weight  type name       up/down reweight
> -1      4       root default
> -3      2        rack unknownrack
> -2      2               host ceph-admin
> 0       1                       osd.0   up      1
> 1       1                       osd.1   up      1
> -4      1        host ceph-node02
> 2       1               osd.2   down    1
> -5      1        host ceph-node01
> 3       1               osd.3   up      1
> -----------------------
> root@ceph-admin:~# ceph osd dump
>
> epoch 159
> fsid db32486a-7ad3-4afe-8b67-49ee2a6dcecf
> created 2013-08-08 13:45:52.579015
> modified 2013-08-12 05:18:37.895385
> flags
>
> pool 0 'data' rep size 2 min_size 1 crush_ruleset 0 object_hash rjenkins
> pg_num 192 pgp_num 192 last_change 1 owner 0 crash_replay_interval 45
> pool 1 'metadata' rep size 2 min_size 1 crush_ruleset 1 object_hash rjenkins
> pg_num 192 pgp_num 192 last_change 1 owner 0
> pool 2 'rbd' rep size 2 min_size 1 crush_ruleset 2 object_hash rjenkins
> pg_num 192 pgp_num 192 last_change 1 owner 0
> pool 3 'volumes' rep size 2 min_size 1 crush_ruleset 0 object_hash rjenkins
> pg_num 8 pgp_num 8 last_change 39 owner 18446744073709551615
>
> max_osd 5
> osd.0 up   in  weight 1 up_from 138 up_thru 157 down_at 137
> last_clean_interval [45,135) 192.168.250.15:6803/5735
> 192.168.250.15:6804/5735 192.168.250.15:6805/5735 exists,up
> 99f2aec0-2367-4b68-86f2-58d6d41589c6
> osd.1 up   in  weight 1 up_from 140 up_thru 157 down_at 137
> last_clean_interval [47,136) 192.168.250.15:6806/6882
> 192.168.250.15:6807/6882 192.168.250.15:6808/6882 exists,up
> d458ca35-ec55-47a9-a7ce-47b9ddf4d889
> osd.2 up   in  weight 1 up_from 157 up_thru 158 down_at 135
> last_clean_interval [48,134) 192.168.250.8:6800/3564 192.168.250.8:6801/3564
> 192.168.250.8:6802/3564 exists,up c4ee9f05-bd5f-4536-8cb8-0af82c00d3d6
> osd.3 up   in  weight 1 up_from 143 up_thru 157 down_at 141
> last_clean_interval [53,141) 192.168.250.16:6802/14618
> 192.168.250.16:6804/14618 192.168.250.16:6805/14618 exists,up
> e9d67b85-97d1-4635-95c8-f7c50cd7f6b1
>
> pg_temp 0.50 [3,1]
> pg_temp 0.8b [2,1]
> pg_temp 1.4f [3,1]
> pg_temp 1.8a [2,1]
> pg_temp 2.4e [3,1]
> pg_temp 2.89 [2,1]
> --------------------------
> root@ceph-admin:/etc/ceph# crushtool -d /tmp/crushmap
> # begin crush map
>
> # devices
> device 0 osd.0
> device 1 osd.1
> device 2 osd.2
> device 3 osd.3
>
> # types
> type 0 osd
> type 1 host
> type 2 rack
> type 3 row
> type 4 room
> type 5 datacenter
> type 6 root
>
> # buckets
> host ceph-admin {
>         id -2           # do not change unnecessarily
>         # weight 2.000
>         alg straw
>         hash 0  # rjenkins1
>         item osd.0 weight 1.000
>         item osd.1 weight 1.000
> }
> rack unknownrack {
>         id -3           # do not change unnecessarily
>         # weight 2.000
>         alg straw
>         hash 0  # rjenkins1
>         item ceph-admin weight 2.000
> }
> host ceph-node02 {
>         id -4           # do not change unnecessarily
>         # weight 1.000
>         alg straw
>         hash 0  # rjenkins1
>         item osd.2 weight 1.000
> }
> host ceph-node01 {
>         id -5           # do not change unnecessarily
>         # weight 1.000
>         alg straw
>         hash 0  # rjenkins1
>         item osd.3 weight 1.000
> }
> root default {
>         id -1           # do not change unnecessarily
>         # weight 4.000
>         alg straw
>         hash 0  # rjenkins1
>         item unknownrack weight 2.000
>         item ceph-node02 weight 1.000
>         item ceph-node01 weight 1.000
> }
>
> # rules
> rule data {
>         ruleset 0
>         type replicated
>         min_size 1
>         max_size 10
>         step take default
>         step choose firstn 0 type osd
>         step emit
> }
> rule volumes {
>         ruleset 3
>         type replicated
>         min_size 1
>         max_size 10
>         step take default
>         step choose firstn 0 type osd
>         step emit
> }
> rule metadata {
>         ruleset 1
>         type replicated
>         min_size 1
>         max_size 10
>         step take default
>         step choose firstn 0 type osd
>         step emit
> }
> rule rbd {
>         ruleset 2
>         type replicated
>         min_size 1
>         max_size 10
>         step take default
>         step choose firstn 0 type osd
>         step emit
> }
>
> # end crush map
> -----------
>
> _______________________________________________
> ceph-users mailing list
> ceph-users@xxxxxxxxxxxxxx
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com