FYI it is a known issue : http://tracker.ceph.com/issues/6109 On 01/09/2014 00:02, Loic Dachary wrote: > Hi Ceph, > > In a mixed dumpling / emperor cluster, because osd 2 has been removed but is still in > > "might_have_unfound": [ > { "osd": 2, > "status": "osd is down"}, > { "osd": 6, > "status": "already probed"}], > > and because of that mark_unfound_lost fails with > > # ceph pg 4.46 mark_unfound_lost revert > Error EINVAL: pg has 1 objects but we haven't probed all sources, not marking lost > > What would be the recommended way to fix this ? > > FWIW the missing object is an XFS read error > > # cp '/var/lib/ceph/osd/ceph-2/current/4.46_head/DIR_6/DIR_C/DIR_D/rbd\udata.9ad9d26b8b4567.00000000000007b1__head_0BC0BDC6__4' . > cp: reading `/var/lib/ceph/osd/ceph-2/current/4.46_head/DIR_6/DIR_C/DIR_D/rbd\\udata.9ad9d26b8b4567.00000000000007b1__head_0BC0BDC6__4': Input/output error > > that is not caught by xfs_repair and I expect the older version of the object on the remaining OSD to be OK. > > Cheers > > osd 6 is running > # ceph --version > ceph version 0.72.2 (a913ded2ff138aefb8cb84d347d72164099cfd60) > > and > osd 1 is running > # ceph --version > ceph version 0.67.4 (ad85b8bfafea6232d64cb7ba76a8b6e8252fa0c7) > > # ceph pg 4.46 mark_unfound_lost revert > Error EINVAL: pg has 1 objects but we haven't probed all sources, not marking lost > # ceph pg 4.46 list_missing > { "offset": { "oid": "", > "key": "", > "snapid": 0, > "hash": 0, > "max": 0, > "pool": -1, > "namespace": ""}, > "num_missing": 1, > "num_unfound": 1, > "objects": [ > { "oid": { "oid": "rbd_data.9ad9d26b8b4567.00000000000007b1", > "key": "", > "snapid": -2, > "hash": 197180870, > "max": 0, > "pool": 4, > "namespace": ""}, > "need": "328685'1233912", > "have": "328683'1233904", > "locations": []}], > "more": 0} > # ceph pg 4.46 query > { "state": "active+recovering+degraded+remapped", > "epoch": 346424, > "up": [ > 6, > 1], > "acting": [ > 1, > 6], > "info": { "pgid": "4.46", > "last_update": "346424'1288927", > "last_complete": "0'0", > "log_tail": "328683'1233911", > "last_backfill": "MAX", > "purged_snaps": "[1~3]", > "history": { "epoch_created": 195, > "last_epoch_started": 346424, > "last_epoch_clean": 328685, > "last_epoch_split": 0, > "same_up_since": 346423, > "same_interval_since": 346423, > "same_primary_since": 346423, > "last_scrub": "328664'1230185", > "last_scrub_stamp": "2014-08-23 09:34:01.524854", > "last_deep_scrub": "328604'1208887", > "last_deep_scrub_stamp": "2014-08-20 09:33:19.073523", > "last_clean_scrub_stamp": "2014-08-23 09:34:01.524854"}, > "stats": { "version": "346424'1288927", > "reported_seq": "2553234", > "reported_epoch": "346424", > "state": "active+recovering+degraded+remapped", > "last_fresh": "2014-08-31 23:47:49.866548", > "last_change": "2014-08-31 23:44:22.571492", > "last_active": "2014-08-31 23:47:49.866548", > "last_clean": "2014-08-23 22:27:23.391412", > "last_became_active": "0.000000", > "last_unstale": "2014-08-31 23:47:49.866548", > "mapping_epoch": 346421, > "log_start": "328683'1233911", > "ondisk_log_start": "328683'1233911", > "created": 195, > "last_epoch_clean": 328685, > "parent": "0.0", > "parent_split_bits": 0, > "last_scrub": "328664'1230185", > "last_scrub_stamp": "2014-08-23 09:34:01.524854", > "last_deep_scrub": "328604'1208887", > "last_deep_scrub_stamp": "2014-08-20 09:33:19.073523", > "last_clean_scrub_stamp": "2014-08-23 09:34:01.524854", > "log_size": 55016, > "ondisk_log_size": 55016, > "stats_invalid": "0", > "stat_sum": { "num_bytes": 12584300544, > "num_objects": 3035, > "num_object_clones": 1, > "num_object_copies": 0, > "num_objects_missing_on_primary": 0, > "num_objects_degraded": 0, > "num_objects_unfound": 0, > "num_read": 51123, > "num_read_kb": 1525186, > "num_write": 1288927, > "num_write_kb": 19076876, > "num_scrub_errors": 0, > "num_shallow_scrub_errors": 0, > "num_deep_scrub_errors": 0, > "num_objects_recovered": 20047, > "num_bytes_recovered": 78532055040, > "num_keys_recovered": 0}, > "stat_cat_sum": {}, > "up": [ > 6, > 1], > "acting": [ > 1, > 6]}, > "empty": 0, > "dne": 0, > "incomplete": 0, > "last_epoch_started": 346424}, > "recovery_state": [ > { "name": "Started\/Primary\/Active", > "enter_time": "2014-08-31 23:44:22.435483", > "might_have_unfound": [ > { "osd": 2, > "status": "osd is down"}, > { "osd": 6, > "status": "already probed"}], > "recovery_progress": { "backfill_target": 6, > "waiting_on_backfill": 0, > "backfill_pos": "0\/\/0\/\/-1", > "backfill_info": { "begin": "0\/\/0\/\/-1", > "end": "0\/\/0\/\/-1", > "objects": []}, > "peer_backfill_info": { "begin": "0\/\/0\/\/-1", > "end": "0\/\/0\/\/-1", > "objects": []}, > "backfills_in_flight": [], > "pull_from_peer": [], > "pushing": []}, > "scrub": { "scrubber.epoch_start": "0", > "scrubber.active": 0, > "scrubber.block_writes": 0, > "scrubber.finalizing": 0, > "scrubber.waiting_on": 0, > "scrubber.waiting_on_whom": []}}, > { "name": "Started", > "enter_time": "2014-08-31 23:44:21.177460"}]} > -- Lo?c Dachary, Artisan Logiciel Libre -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 263 bytes Desc: OpenPGP digital signature URL: <http://lists.ceph.com/pipermail/ceph-users-ceph.com/attachments/20140902/a91a872f/attachment.pgp>