On my test cluster, some PGs are stuck unclean forever (pool 24, size=2). Directory /var/lib/ceph/osd/ceph-X/current/24.126_head/ is empty on all OSDs. Any idea what is wrong? And how can I recover from that state? # ceph -v ceph version 0.72.2 (a913ded2ff138aefb8cb84d347d72164099cfd60) # ceph osd dump epoch 9419 fsid 9529fb9b-6c5e-4ca6-bc6f-4da46c611d75 created 2014-01-10 14:00:50.536852 modified 2014-02-10 11:37:32.301989 flags pool 24 'ceph2' rep size 2 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 600 pgp_num 600 last_change 7266 owner 0 removed_snaps [1~7,1c~2,26~1,28~1,2a~2] pool 25 'ceph3' rep size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 512 pgp_num 512 last_change 1588 owner 0 removed_snaps [1~3] max_osd 42 osd.0 up in weight 1 up_from 9416 up_thru 9416 down_at 9411 last_clean_interval [9366,9406) 10.10.10.201:6815/4607 10.10.10.201:6816/4607 10.10.10.201:6817/4607 10.10.10.201:6818/4607 exists,up d1cc076f-b5f3-41ca-897e-3a0e71280f83 osd.1 up in weight 1 up_from 9415 up_thru 9415 down_at 9409 last_clean_interval [9367,9406) 10.10.10.201:6810/4341 10.10.10.201:6811/4341 10.10.10.201:6812/4341 10.10.10.201:6813/4341 exists,up 23609eba-e9fa-4822-b4a9-95447df74c52 osd.2 up in weight 1 up_from 9413 up_thru 9413 down_at 9411 last_clean_interval [9365,9406) 10.10.10.201:6800/3870 10.10.10.201:6801/3870 10.10.10.201:6802/3870 10.10.10.201:6803/3870 exists,up 8d499f20-8ea4-4dab-8991-c293eb374ff6 osd.3 up in weight 1 up_from 9414 up_thru 9414 down_at 9407 last_clean_interval [9366,9406) 10.10.10.201:6805/4058 10.10.10.201:6806/4058 10.10.10.201:6807/4058 10.10.10.201:6808/4058 exists,up ab46e1e2-22e7-4555-87ff-828b3d447a04 osd.4 up in weight 1 up_from 9405 up_thru 9416 down_at 9399 last_clean_interval [9380,9395) 10.10.10.202:6815/4786 10.10.10.202:6816/4786 10.10.10.202:6817/4786 10.10.10.202:6818/4786 exists,up 9cea4273-e594-4392-9826-8463c244fe0d osd.5 up in weight 1 up_from 9403 up_thru 9416 down_at 9401 last_clean_interval [9251,9395) 10.10.10.202:6800/3983 10.10.10.202:6801/3983 10.10.10.202:6802/3983 10.10.10.202:6803/3983 exists,up 2cfe91cc-03f9-49b2-a41a-17e61d99aa39 osd.6 up in weight 1 up_from 9405 up_thru 9416 down_at 9395 last_clean_interval [9255,9394) 10.10.10.202:6810/4501 10.10.10.202:6811/4501 10.10.10.202:6812/4501 10.10.10.202:6813/4501 exists,up c0a6094d-fbd3-4fff-88eb-2345177378ad osd.7 up in weight 1 up_from 9403 up_thru 9416 down_at 9397 last_clean_interval [9376,9395) 10.10.10.202:6805/4283 10.10.10.202:6806/4283 10.10.10.202:6807/4283 10.10.10.202:6808/4283 exists,up d1892051-3e8b-41a8-a14f-26284db7bc20 osd.8 up in weight 1 up_from 9391 up_thru 9416 down_at 9388 last_clean_interval [9372,9382) 10.10.10.203:6815/4719 10.10.10.203:6816/4719 10.10.10.203:6817/4719 10.10.10.203:6818/4719 exists,up fe79adf5-0533-4418-84ea-322634bff457 osd.9 up in weight 1 up_from 9392 up_thru 9416 down_at 9382 last_clean_interval [9228,9381) 10.10.10.203:6805/4165 10.10.10.203:6806/4165 10.10.10.203:6807/4165 10.10.10.203:6808/4165 exists,up 62e9bdaa-6534-4f78-a94c-8fc40c949973 osd.10 up in weight 1 up_from 9390 up_thru 9416 down_at 9386 last_clean_interval [9227,9382) 10.10.10.203:6810/4407 10.10.10.203:6811/4407 10.10.10.203:6812/4407 10.10.10.203:6813/4407 exists,up 2d140958-7869-4b73-944d-dcaf7e3fc553 osd.11 up in weight 1 up_from 9393 up_thru 9416 down_at 9384 last_clean_interval [9227,9382) 10.10.10.203:6800/3979 10.10.10.203:6801/3979 10.10.10.203:6802/3979 10.10.10.203:6803/3979 exists,up 5a3151e9-647c-45e5-a2ff-15a2f39ea794 pg_temp 31.3 [2,8,1] …. # ceph health HEALTH_WARN 3 pgs down; 41 pgs incomplete; 41 pgs stuck inactive; 41 pgs stuck unclean; 3 requests are blocked > 32 sec # ceph health detail … pg 24.126 is stuck unclean since forever, current state down+incomplete, last acting [7,8] … # ceph pg 24.126 query { "state": "down+incomplete", "epoch": 9419, "up": [ 7, 8], "acting": [ 7, 8], "info": { "pgid": "24.126", "last_update": "0'0", "last_complete": "0'0", "log_tail": "0'0", "last_user_version": 0, "last_backfill": "MAX", "purged_snaps": "[]", "history": { "epoch_created": 1541, "last_epoch_started": 7406, "last_epoch_clean": 7268, "last_epoch_split": 0, "same_up_since": 9403, "same_interval_since": 9403, "same_primary_since": 9403, "last_scrub": "6772'8445", "last_scrub_stamp": "2014-02-08 18:50:24.166732", "last_deep_scrub": "6772'8445", "last_deep_scrub_stamp": "2014-02-08 18:50:24.166732", "last_clean_scrub_stamp": "2014-02-08 18:50:24.166732"}, "stats": { "version": "0'0", "reported_seq": "568", "reported_epoch": "9419", "state": "down+incomplete", "last_fresh": "2014-02-10 11:37:32.315396", "last_change": "2014-02-10 10:44:04.946429", "last_active": "0.000000", "last_clean": "0.000000", "last_became_active": "0.000000", "last_unstale": "2014-02-10 11:37:32.315396", "mapping_epoch": 9397, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 1541, "last_epoch_clean": 7268, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "6772'8445", "last_scrub_stamp": "2014-02-08 18:50:24.166732", "last_deep_scrub": "6772'8445", "last_deep_scrub_stamp": "2014-02-08 18:50:24.166732", "last_clean_scrub_stamp": "2014-02-08 18:50:24.166732", "log_size": 0, "ondisk_log_size": 0, "stats_invalid": "0", "stat_sum": { "num_bytes": 0, "num_objects": 0, "num_object_clones": 0, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_degraded": 0, "num_objects_unfound": 0, "num_read": 0, "num_read_kb": 0, "num_write": 0, "num_write_kb": 0, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0}, "stat_cat_sum": {}, "up": [ 7, 8], "acting": [ 7, 8]}, "empty": 1, "dne": 0, "incomplete": 0, "last_epoch_started": 0}, "recovery_state": [ { "name": "Started\/Primary\/Peering", "enter_time": "2014-02-10 10:44:04.921671", "past_intervals": [ { "first": 7267, "last": 7268, "maybe_went_rw": 1, "up": [ 7], "acting": [ 7]}, { "first": 7269, "last": 7273, "maybe_went_rw": 1, "up": [ 7, 1], "acting": [ 7, 1]}, { "first": 7274, "last": 7335, "maybe_went_rw": 1, "up": [ 7, 11], "acting": [ 7, 11]}, { "first": 7336, "last": 7336, "maybe_went_rw": 0, "up": [ 7, 1], "acting": [ 7, 1]}, { "first": 7337, "last": 7391, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 7392, "last": 7395, "maybe_went_rw": 1, "up": [ 1, 8], "acting": [ 1, 8]}, { "first": 7396, "last": 7423, "maybe_went_rw": 1, "up": [ 1, 8], "acting": [ 7, 1]}, { "first": 7424, "last": 7425, "maybe_went_rw": 1, "up": [ 1, 8], "acting": [ 1]}, { "first": 7426, "last": 7963, "maybe_went_rw": 1, "up": [ 1, 8], "acting": [ 1, 8]}, { "first": 7964, "last": 7964, "maybe_went_rw": 0, "up": [ 1], "acting": [ 1]}, { "first": 7965, "last": 7973, "maybe_went_rw": 0, "up": [ 1, 8], "acting": [ 1, 8]}, { "first": 7974, "last": 7974, "maybe_went_rw": 0, "up": [ 8], "acting": [ 8]}, { "first": 7975, "last": 8732, "maybe_went_rw": 1, "up": [ 1, 8], "acting": [ 1, 8]}, { "first": 8733, "last": 8835, "maybe_went_rw": 1, "up": [ 4, 8], "acting": [ 4, 8]}, { "first": 8836, "last": 8837, "maybe_went_rw": 1, "up": [ 6, 8], "acting": [ 6, 8]}, { "first": 8838, "last": 9223, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9224, "last": 9225, "maybe_went_rw": 1, "up": [ 7], "acting": [ 7]}, { "first": 9226, "last": 9233, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9234, "last": 9237, "maybe_went_rw": 1, "up": [ 8], "acting": [ 8]}, { "first": 9238, "last": 9244, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9245, "last": 9252, "maybe_went_rw": 1, "up": [ 8], "acting": [ 8]}, { "first": 9253, "last": 9369, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9370, "last": 9371, "maybe_went_rw": 1, "up": [ 7], "acting": [ 7]}, { "first": 9372, "last": 9373, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9374, "last": 9375, "maybe_went_rw": 1, "up": [ 8], "acting": [ 8]}, { "first": 9376, "last": 9387, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9388, "last": 9390, "maybe_went_rw": 1, "up": [ 7], "acting": [ 7]}, { "first": 9391, "last": 9396, "maybe_went_rw": 1, "up": [ 7, 8], "acting": [ 7, 8]}, { "first": 9397, "last": 9402, "maybe_went_rw": 1, "up": [ 8], "acting": [ 8]}], "probing_osds": [ 1, 4, 6, 7, 8], "down_osds_we_would_probe": [], "peering_blocked_by": []}, { "name": "Started", "enter_time": "2014-02-10 10:44:04.921471"}]} # ceph pg 24.126 list_missing { "offset": { "oid": "", "key": "", "snapid": 0, "hash": 0, "max": 0, "pool": -1, "namespace": ""}, "num_missing": 0, "num_unfound": 0, "objects": [], "more": 0} |
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com