Assuming the one who lost its filesystem is totally gone, mark it lost. That will tell the OSDs to give up on whatever data it might have had and you should be good to go (modulo whatever data you might have lost from only having it on the dead OSD during the reboot). -Greg Software Engineer #42 @ http://inktank.com | http://ceph.com On Tue, Jan 7, 2014 at 1:32 PM, Bryan Morris <bryan@xxxxxxxxxxxxxx> wrote: > > I have 4 PGs stuck inactive..... > > I have tried everything I can find online, and could use some help. > > I had 3 OSDs... was in process of rebooting one and another totally crashed > and corrupted its filesystem (BAD). > > So now I have 4 incomplete PGs. > > Things I've tried: > > Rebooting all OSDs. > Running 'ceph pg {pg-id} mark_unfound_lost revert' > Running 'ceph osd lost {OSD ID}' > > > Here is a PG query from one of the 4 PGs > > > > [[{ "state": "down+incomplete", > "epoch": 7994, > "up": [ > 1, > 2], > "acting": [ > 1, > 2], > "info": { "pgid": "4.1ca", > "last_update": "0'0", > "last_complete": "0'0", > "log_tail": "0'0", > "last_backfill": "MAX", > "purged_snaps": "[]", > "history": { "epoch_created": 75, > "last_epoch_started": 6719, > "last_epoch_clean": 6710, > "last_epoch_split": 0, > "same_up_since": 7973, > "same_interval_since": 7973, > "same_primary_since": 7969, > "last_scrub": "6239'25314", > "last_scrub_stamp": "2014-01-05 18:48:01.122717", > "last_deep_scrub": "6235'25313", > "last_deep_scrub_stamp": "2014-01-03 18:47:48.390112", > "last_clean_scrub_stamp": "2014-01-05 18:48:01.122717"}, > "stats": { "version": "0'0", > "reported_seq": "1259", > "reported_epoch": "7994", > "state": "down+incomplete", > "last_fresh": "2014-01-07 14:04:36.269587", > "last_change": "2014-01-07 14:04:36.269587", > "last_active": "0.000000", > "last_clean": "0.000000", > "last_became_active": "0.000000", > "last_unstale": "2014-01-07 14:04:36.269587", > "mapping_epoch": 7971, > "log_start": "0'0", > "ondisk_log_start": "0'0", > "created": 75, > "last_epoch_clean": 6710, > "parent": "0.0", > "parent_split_bits": 0, > "last_scrub": "6239'25314", > "last_scrub_stamp": "2014-01-05 18:48:01.122717", > "last_deep_scrub": "6235'25313", > "last_deep_scrub_stamp": "2014-01-03 18:47:48.390112", > "last_clean_scrub_stamp": "2014-01-05 18:48:01.122717", > "log_size": 0, > "ondisk_log_size": 0, > "stats_invalid": "0", > "stat_sum": { "num_bytes": 0, > "num_objects": 0, > "num_object_clones": 0, > "num_object_copies": 0, > "num_objects_missing_on_primary": 0, > "num_objects_degraded": 0, > "num_objects_unfound": 0, > "num_read": 0, > "num_read_kb": 0, > "num_write": 0, > "num_write_kb": 0, > "num_scrub_errors": 0, > "num_shallow_scrub_errors": 0, > "num_deep_scrub_errors": 0, > "num_objects_recovered": 0, > "num_bytes_recovered": 0, > "num_keys_recovered": 0}, > "stat_cat_sum": {}, > "up": [ > 1, > 2], > "acting": [ > 1, > 2]}, > "empty": 1, > "dne": 0, > "incomplete": 0, > "last_epoch_started": 0}, > "recovery_state": [ > { "name": "Started\/Primary\/Peering", > "enter_time": "2014-01-07 14:04:36.269509", > "past_intervals": [ > { "first": 5221, > "last": 5226, > "maybe_went_rw": 1, > "up": [ > 0, > 1], > "acting": [ > 0, > 1]}, > { "first": 5227, > "last": 5439, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 5440, > "last": 5441, > "maybe_went_rw": 0, > "up": [ > 1], > "acting": [ > 1]}, > { "first": 5442, > "last": 6388, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 6389, > "last": 6395, > "maybe_went_rw": 1, > "up": [ > 0], > "acting": [ > 0]}, > { "first": 6396, > "last": 6708, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 6709, > "last": 6717, > "maybe_went_rw": 1, > "up": [ > 1], > "acting": [ > 1]}, > { "first": 6718, > "last": 6721, > "maybe_went_rw": 1, > "up": [ > 1, > 2], > "acting": [ > 1, > 2]}, > { "first": 6722, > "last": 6724, > "maybe_went_rw": 1, > "up": [ > 2], > "acting": [ > 2]}, > { "first": 6725, > "last": 6737, > "maybe_went_rw": 1, > "up": [ > 0, > 2], > "acting": [ > 0, > 2]}, > { "first": 6738, > "last": 7946, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 7947, > "last": 7948, > "maybe_went_rw": 1, > "up": [ > 1], > "acting": [ > 1]}, > { "first": 7949, > "last": 7950, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 7951, > "last": 7952, > "maybe_went_rw": 1, > "up": [ > 0], > "acting": [ > 0]}, > { "first": 7953, > "last": 7954, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 7955, > "last": 7956, > "maybe_went_rw": 1, > "up": [ > 1], > "acting": [ > 1]}, > { "first": 7957, > "last": 7958, > "maybe_went_rw": 1, > "up": [ > 1, > 2], > "acting": [ > 1, > 2]}, > { "first": 7959, > "last": 7966, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 7967, > "last": 7968, > "maybe_went_rw": 1, > "up": [ > 0], > "acting": [ > 0]}, > { "first": 7969, > "last": 7970, > "maybe_went_rw": 1, > "up": [ > 1, > 0], > "acting": [ > 1, > 0]}, > { "first": 7971, > "last": 7972, > "maybe_went_rw": 1, > "up": [ > 1], > "acting": [ > 1]}], > "probing_osds": [ > 1, > 2], > "down_osds_we_would_probe": [ > 0], > "peering_blocked_by": []}, > { "name": "Started", > "enter_time": "2014-01-07 14:04:36.269462"}]} > > > > CEPH OSD TREE > > ceph osd tree > # id weight type name up/down reweight > -1 4.5 pool default > -4 1.8 host ceph2 > 1 1.8 osd.1 up 1 > -6 2.7 host ceph3 > 2 2.7 osd.2 up 1 > > > > _______________________________________________ > ceph-users mailing list > ceph-users@xxxxxxxxxxxxxx > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com