Hi Ceph, In a mixed dumpling / emperor cluster, because osd 2 has been removed but is still in "might_have_unfound": [ { "osd": 2, "status": "osd is down"}, { "osd": 6, "status": "already probed"}], and because of that mark_unfound_lost fails with # ceph pg 4.46 mark_unfound_lost revert Error EINVAL: pg has 1 objects but we haven't probed all sources, not marking lost What would be the recommended way to fix this ? FWIW the missing object is an XFS read error # cp '/var/lib/ceph/osd/ceph-2/current/4.46_head/DIR_6/DIR_C/DIR_D/rbd\udata.9ad9d26b8b4567.00000000000007b1__head_0BC0BDC6__4' . cp: reading `/var/lib/ceph/osd/ceph-2/current/4.46_head/DIR_6/DIR_C/DIR_D/rbd\\udata.9ad9d26b8b4567.00000000000007b1__head_0BC0BDC6__4': Input/output error that is not caught by xfs_repair and I expect the older version of the object on the remaining OSD to be OK. Cheers osd 6 is running # ceph --version ceph version 0.72.2 (a913ded2ff138aefb8cb84d347d72164099cfd60) and osd 1 is running # ceph --version ceph version 0.67.4 (ad85b8bfafea6232d64cb7ba76a8b6e8252fa0c7) # ceph pg 4.46 mark_unfound_lost revert Error EINVAL: pg has 1 objects but we haven't probed all sources, not marking lost # ceph pg 4.46 list_missing { "offset": { "oid": "", "key": "", "snapid": 0, "hash": 0, "max": 0, "pool": -1, "namespace": ""}, "num_missing": 1, "num_unfound": 1, "objects": [ { "oid": { "oid": "rbd_data.9ad9d26b8b4567.00000000000007b1", "key": "", "snapid": -2, "hash": 197180870, "max": 0, "pool": 4, "namespace": ""}, "need": "328685'1233912", "have": "328683'1233904", "locations": []}], "more": 0} # ceph pg 4.46 query { "state": "active+recovering+degraded+remapped", "epoch": 346424, "up": [ 6, 1], "acting": [ 1, 6], "info": { "pgid": "4.46", "last_update": "346424'1288927", "last_complete": "0'0", "log_tail": "328683'1233911", "last_backfill": "MAX", "purged_snaps": "[1~3]", "history": { "epoch_created": 195, "last_epoch_started": 346424, "last_epoch_clean": 328685, "last_epoch_split": 0, "same_up_since": 346423, "same_interval_since": 346423, "same_primary_since": 346423, "last_scrub": "328664'1230185", "last_scrub_stamp": "2014-08-23 09:34:01.524854", "last_deep_scrub": "328604'1208887", "last_deep_scrub_stamp": "2014-08-20 09:33:19.073523", "last_clean_scrub_stamp": "2014-08-23 09:34:01.524854"}, "stats": { "version": "346424'1288927", "reported_seq": "2553234", "reported_epoch": "346424", "state": "active+recovering+degraded+remapped", "last_fresh": "2014-08-31 23:47:49.866548", "last_change": "2014-08-31 23:44:22.571492", "last_active": "2014-08-31 23:47:49.866548", "last_clean": "2014-08-23 22:27:23.391412", "last_became_active": "0.000000", "last_unstale": "2014-08-31 23:47:49.866548", "mapping_epoch": 346421, "log_start": "328683'1233911", "ondisk_log_start": "328683'1233911", "created": 195, "last_epoch_clean": 328685, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "328664'1230185", "last_scrub_stamp": "2014-08-23 09:34:01.524854", "last_deep_scrub": "328604'1208887", "last_deep_scrub_stamp": "2014-08-20 09:33:19.073523", "last_clean_scrub_stamp": "2014-08-23 09:34:01.524854", "log_size": 55016, "ondisk_log_size": 55016, "stats_invalid": "0", "stat_sum": { "num_bytes": 12584300544, "num_objects": 3035, "num_object_clones": 1, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_degraded": 0, "num_objects_unfound": 0, "num_read": 51123, "num_read_kb": 1525186, "num_write": 1288927, "num_write_kb": 19076876, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 20047, "num_bytes_recovered": 78532055040, "num_keys_recovered": 0}, "stat_cat_sum": {}, "up": [ 6, 1], "acting": [ 1, 6]}, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 346424}, "recovery_state": [ { "name": "Started\/Primary\/Active", "enter_time": "2014-08-31 23:44:22.435483", "might_have_unfound": [ { "osd": 2, "status": "osd is down"}, { "osd": 6, "status": "already probed"}], "recovery_progress": { "backfill_target": 6, "waiting_on_backfill": 0, "backfill_pos": "0\/\/0\/\/-1", "backfill_info": { "begin": "0\/\/0\/\/-1", "end": "0\/\/0\/\/-1", "objects": []}, "peer_backfill_info": { "begin": "0\/\/0\/\/-1", "end": "0\/\/0\/\/-1", "objects": []}, "backfills_in_flight": [], "pull_from_peer": [], "pushing": []}, "scrub": { "scrubber.epoch_start": "0", "scrubber.active": 0, "scrubber.block_writes": 0, "scrubber.finalizing": 0, "scrubber.waiting_on": 0, "scrubber.waiting_on_whom": []}}, { "name": "Started", "enter_time": "2014-08-31 23:44:21.177460"}]} -- Lo?c Dachary, Artisan Logiciel Libre -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 263 bytes Desc: OpenPGP digital signature URL: <http://lists.ceph.com/pipermail/ceph-users-ceph.com/attachments/20140901/70e33899/attachment.pgp>