Hello, We have some problems with 1 pg from this morning, this is what we found so far… # ceph --version ceph version 10.2.0 (3a9fba20ec743699b69bd0181dd6c54dc01c64b9) # ceph -s cluster 2bf80721-fceb-4b63-89ee-1a5faa278493 health HEALTH_ERR 1 pgs inconsistent 2 scrub errors monmap e1: 1 mons at {cephadm01=192.168.12.150:6789/0} election epoch 7, quorum 0 cephadm01 osdmap e580: 9 osds: 9 up, 9 in flags sortbitwise pgmap v11430755: 664 pgs, 13 pools, 1010 GB data, 13894 kobjects 2142 GB used, 2355 GB / 4497 GB avail 660 active+clean 3 active+clean+scrubbing 1 active+clean+inconsistent # ceph health detail HEALTH_ERR 1 pgs inconsistent; 2 scrub errors pg 10.55 is active+clean+inconsistent, acting [3,4] 2 scrub errors # ceph pg 10.55 query { "state": "active+clean+inconsistent", "snap_trimq": "[]", "epoch": 580, "up": [ 3, 4 ], "acting": [ 3, 4 ], "actingbackfill": [ "3", "4" ], "info": { "pgid": "10.55", "last_update": "580'40334", "last_complete": "580'40334", "log_tail": "448'37299", "last_user_version": 40334, "last_backfill": "MAX", "last_backfill_bitwise": 1, "purged_snaps": "[]", "history": { "epoch_created": 329, "last_epoch_started": 577, "last_epoch_clean": 577, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 576, "same_interval_since": 576, "same_primary_since": 572, "last_scrub": "568'40333", "last_scrub_stamp": "2017-01-26 10:06:56.062870", "last_deep_scrub": "562'40329", "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518", "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218" }, "stats": { "version": "580'40334", "reported_seq": "49407", "reported_epoch": "580", "state": "active+clean+inconsistent", "last_fresh": "2017-01-26 11:21:55.393989", "last_change": "2017-01-26 10:06:56.062930", "last_active": "2017-01-26 11:21:55.393989", "last_peered": "2017-01-26 11:21:55.393989", "last_clean": "2017-01-26 11:21:55.393989", "last_became_active": "2017-01-26 09:28:09.196447", "last_became_peered": "2017-01-26 09:28:09.196447", "last_unstale": "2017-01-26 11:21:55.393989", "last_undegraded": "2017-01-26 11:21:55.393989", "last_fullsized": "2017-01-26 11:21:55.393989", "mapping_epoch": 575, "log_start": "448'37299", "ondisk_log_start": "448'37299", "created": 329, "last_epoch_clean": 577, "parent": "0.0", "parent_split_bits": 8, "last_scrub": "568'40333", "last_scrub_stamp": "2017-01-26 10:06:56.062870", "last_deep_scrub": "562'40329", "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518", "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218", "log_size": 3035, "ondisk_log_size": 3035, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 2153869599, "num_objects": 28148, "num_object_clones": 0, "num_object_copies": 56296, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 28148, "num_whiteouts": 0, "num_read": 21, "num_read_kb": 696, "num_write": 50, "num_write_kb": 217, "num_scrub_errors": 2, "num_shallow_scrub_errors": 1, "num_deep_scrub_errors": 1, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [ 3, 4 ], "acting": [ 3, 4 ], "blocked_by": [], "up_primary": 3, "acting_primary": 3 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 577, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [ { "peer": "4", "pgid": "10.55", "last_update": "580'40334", "last_complete": "580'40334", "log_tail": "448'37299", "last_user_version": 40333, "last_backfill": "MAX", "last_backfill_bitwise": 1, "purged_snaps": "[]", "history": { "epoch_created": 329, "last_epoch_started": 577, "last_epoch_clean": 577, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 576, "same_interval_since": 576, "same_primary_since": 572, "last_scrub": "568'40333", "last_scrub_stamp": "2017-01-26 10:06:56.062870", "last_deep_scrub": "562'40329", "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518", "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218" }, "stats": { "version": "568'40333", "reported_seq": "49386", "reported_epoch": "571", "state": "inconsistent+peering", "last_fresh": "2017-01-26 09:25:26.210512", "last_change": "2017-01-26 09:25:26.210512", "last_active": "2017-01-26 08:40:35.617481", "last_peered": "2017-01-26 08:40:35.617481", "last_clean": "2017-01-26 08:40:35.617481", "last_became_active": "2017-01-26 08:29:09.145329", "last_became_peered": "2017-01-26 08:29:09.145329", "last_unstale": "2017-01-26 09:25:26.210512", "last_undegraded": "2017-01-26 09:25:26.210512", "last_fullsized": "2017-01-26 09:25:26.210512", "mapping_epoch": 575, "log_start": "448'37299", "ondisk_log_start": "448'37299", "created": 329, "last_epoch_clean": 568, "parent": "0.0", "parent_split_bits": 8, "last_scrub": "562'40329", "last_scrub_stamp": "2017-01-26 06:19:55.708518", "last_deep_scrub": "562'40329", "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518", "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218", "log_size": 3034, "ondisk_log_size": 3034, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 2153849873, "num_objects": 28147, "num_object_clones": 0, "num_object_copies": 56294, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 28147, "num_whiteouts": 0, "num_read": 6, "num_read_kb": 110, "num_write": 40, "num_write_kb": 197, "num_scrub_errors": 1, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 1, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [ 3, 4 ], "acting": [ 3, 4 ], "blocked_by": [], "up_primary": 3, "acting_primary": 3 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 577, "hit_set_history": { "current_last_update": "0'0", "history": [] } } ], "recovery_state": [ { "name": "Started\/Primary\/Active", "enter_time": "2017-01-26 09:28:09.159017", "might_have_unfound": [], "recovery_progress": { "backfill_targets": [], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "pull_from_peer": [], "pushing": [] } }, "scrub": { "scrubber.epoch_start": "576", "scrubber.active": 0, "scrubber.state": "INACTIVE", "scrubber.start": "MIN", "scrubber.end": "MIN", "scrubber.subset_last_update": "0'0", "scrubber.deep": false, "scrubber.seed": 0, "scrubber.waiting_on": 0, "scrubber.waiting_on_whom": [] } }, { "name": "Started", "enter_time": "2017-01-26 09:28:08.166221" } ], "agent_state": {} } # grep -Hn 'ERR' /var/log/ceph/ceph-osd.3.log /var/log/ceph/ceph-osd.3.log:47:2017-01-26 06:08:48.147129 7f3fda627700 -1 log_channel(cluster) log [ERR] : 10.55 shard 3: soid 10:aa0c6d9c:::ef4069bf-70fb-4414-a9d9-6bf5b32608fb.34127.33_nalazi%2f201607%2fLab_7bd28004-cc9d-4039-9567-7f5c597f6d88.pdf:head
data_digest 0xc44df2ba != known data_digest 0xff59029 from auth shard 4 /var/log/ceph/ceph-osd.3.log:48:2017-01-26 06:19:55.708507 7f3fda627700 -1 log_channel(cluster) log [ERR] : 10.55 deep-scrub 0 missing, 1 inconsistent objects /var/log/ceph/ceph-osd.3.log:49:2017-01-26 06:19:55.708513 7f3fda627700 -1 log_channel(cluster) log [ERR] : 10.55 deep-scrub 1 errors /var/log/ceph/ceph-osd.3.log:7464:2017-01-26 10:00:48.267401 7fa6970c2700 -1 log_channel(cluster) log [ERR] : 10.55 shard 3 missing 10:aa0c6d9c:::ef4069bf-70fb-4414-a9d9-6bf5b32608fb.34127.33_nalazi%2f201607%2fLab_7bd28004-cc9d-4039-9567-7f5c597f6d88.pdf:head /var/log/ceph/ceph-osd.3.log:7467:2017-01-26 10:06:56.062852 7fa6970c2700 -1 log_channel(cluster) log [ERR] : 10.55 scrub 1 missing, 0 inconsistent objects /var/log/ceph/ceph-osd.3.log:7468:2017-01-26 10:06:56.062858 7fa6970c2700 -1 log_channel(cluster) log [ERR] : 10.55 scrub 1 errors ( 1 remaining deep scrub error(s) ) We have located inconsistent pg on the osd.3, and have tried the following: Stop the osd 3. Flushed journal Moved the object to /tmp Started osd 3 Tried ceph pg repair 10.55, but no luck… Is there some ceph tool we could use to copy the healthy object from osd 4 to osd 3? Best regards! |
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com