TLDR; Old crusty PGs that could be deleted without consequence.
----------------
Robert LeBlanc
PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1
Robert LeBlanc
PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1
On Tue, Mar 15, 2022 at 10:26 AM Robert LeBlanc <robert@xxxxxxxxxxxxx> wrote:
We had a host that had hung this morning and after restarting it 17 OSDs across the cluster crashed with a past_interval assert. The weird thing is that the OSD complaining about the PG isn't in the acting, up or acting_recovery_backfill sets. This is 14.2.22 with BlueStore.It appears that it's trying to be in the peering group but maybe it has an old version of the pg that it wants to catch up. Since the up set is up, can we just trash the PG on osd.16 and have it resync it?This is an example of osd.16 logs.```-18> 2022-03-15 16:14:42.410 7f5124396700 1 osd.16 pg_epoch: 346705 pg[12.19( v 6242'1003 (0'0,6242'1003] local-lis/les=346701/346702 n=19 ec=4291/4291 lis/c 346701/345120 les/c/f 346702/345121/266364 346700/346705/346700) [252,16,2
67] r=1 lpr=346705 pi=[346196,346705)/2 crt=6242'1003 lcod 0'0 unknown mbc={}] start_peering_interval up [252,16,26
7] -> [252,16,267], acting [252,16] -> [252,16,267], acting_primary 252 -> 252, up_primary 252 -> 252, role 1 -> 1, features acting 4611087854035861503 upacting 4611087854035861503
-17> 2022-03-15 16:14:42.410 7f5124396700 -1 log_channel(cluster) log [ERR] : 12.19 past_intervals [346196,346705) start interval does not contain the required bound [345266,346705) start
-16> 2022-03-15 16:14:42.410 7f5124396700 -1 osd.16 pg_epoch: 346705 pg[12.19( v 6242'1003 (0'0,6242'1003] local-lis/les=346701/346702 n=19 ec=4291/4291 lis/c 346701/345120 les/c/f 346702/345121/266364 346700/346705/346700) [252,16,2
67] r=1 lpr=346705 pi=[346196,346705)/2 crt=6242'1003 lcod 0'0 unknown NOTIFY mbc={}] 12.19 past_intervals [346196,346705) start interval does not contain the required bound [345266,346705) start
-15> 2022-03-15 16:14:42.414 7f5124396700 -1 /build/ceph-14.2.22/src/osd/PG.cc: In function 'void PG::check_past_interval_bounds() const' thread 7f5124396700 time 2022-03-15 16:14:42.411247
/build/ceph-14.2.22/src/osd/PG.cc: 956: ceph_abort_msg("past_interval start interval mismatch")
``````{
"state": "active+clean",
"snap_trimq": "[]",
"snap_trimq_len": 0,
"epoch": 348456,
"up": [
252,
258,
36
],
"acting": [
252,
258,
36
],
"acting_recovery_backfill": [
"36",
"252",
"258"
],
"info": {
"pgid": "12.19",
"last_update": "6242'1003",
"last_complete": "6242'1003",
"log_tail": "0'0",
"last_user_version": 651,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 4291,
"epoch_pool_created": 4291,
"last_epoch_started": 346836,
"last_interval_started": 346835,
"last_epoch_clean": 346836,
"last_interval_clean": 346835,
"last_epoch_split": 0,
"last_epoch_marked_full": 266364,
"same_up_since": 346832,
"same_interval_since": 346835,
"same_primary_since": 346700,
"last_scrub": "6242'1003",
"last_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_deep_scrub": "6242'1003",
"last_deep_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_clean_scrub_stamp": "2022-03-14 22:53:26.138458"
},
"stats": {
"version": "6242'1003",
"reported_seq": "15883",
"reported_epoch": "348456",
"state": "active+clean",
"last_fresh": "2022-03-15 15:52:00.061854",
"last_change": "2022-03-15 14:33:05.179646",
"last_active": "2022-03-15 15:52:00.061854",
"last_peered": "2022-03-15 15:52:00.061854",
"last_clean": "2022-03-15 15:52:00.061854",
"last_became_active": "2022-03-15 14:33:05.179518",
"last_became_peered": "2022-03-15 14:33:05.179518",
"last_unstale": "2022-03-15 15:52:00.061854",
"last_undegraded": "2022-03-15 15:52:00.061854",
"last_fullsized": "2022-03-15 15:52:00.061854",
"mapping_epoch": 346835,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 4291,
"last_epoch_clean": 346836,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "6242'1003",
"last_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_deep_scrub": "6242'1003",
"last_deep_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_clean_scrub_stamp": "2022-03-14 22:53:26.138458",
"log_size": 1003,
"ondisk_log_size": 1003,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": true,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 79691776,
"num_objects": 19,
"num_object_clones": 0,
"num_object_copies": 57,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19,
"num_whiteouts": 0,
"num_read": 512,
"num_read_kb": 524288,
"num_write": 1092,
"num_write_kb": 602112,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 133,
"num_bytes_recovered": 557842432,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
252,
258,
36
],
"acting": [
252,
258,
36
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 252,
"acting_primary": 252,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 346836,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
"peer_info": [
{
"peer": "36",
"pgid": "12.19",
"last_update": "6242'1003",
"last_complete": "6242'1003",
"log_tail": "0'0",
"last_user_version": 651,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 4291,
"epoch_pool_created": 4291,
"last_epoch_started": 346836,
"last_interval_started": 346835,
"last_epoch_clean": 346836,
"last_interval_clean": 346835,
"last_epoch_split": 0,
"last_epoch_marked_full": 266364,
"same_up_since": 346832,
"same_interval_since": 346835,
"same_primary_since": 346700,
"last_scrub": "6242'1003",
"last_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_deep_scrub": "6242'1003",
"last_deep_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_clean_scrub_stamp": "2022-03-14 22:53:26.138458"
},
"stats": {
"version": "6242'1003",
"reported_seq": "585",
"reported_epoch": "345119",
"state": "remapped+peering",
"last_fresh": "2022-03-15 12:24:47.243534",
"last_change": "2022-03-15 12:24:46.206723",
"last_active": "2022-03-15 12:24:46.206497",
"last_peered": "2022-03-15 12:23:55.204019",
"last_clean": "2019-12-19 21:22:43.296344",
"last_became_active": "2022-03-15 12:14:42.790929",
"last_became_peered": "2022-03-15 12:14:42.790929",
"last_unstale": "2022-03-15 12:24:47.243534",
"last_undegraded": "2022-03-15 12:24:47.243534",
"last_fullsized": "2022-03-15 12:24:47.243534",
"mapping_epoch": 346835,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 4291,
"last_epoch_clean": 344591,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "6242'1003",
"last_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_deep_scrub": "6242'1003",
"last_deep_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_clean_scrub_stamp": "2022-03-14 22:53:26.138458",
"log_size": 1003,
"ondisk_log_size": 1003,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": true,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 79691776,
"num_objects": 19,
"num_object_clones": 0,
"num_object_copies": 57,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19,
"num_whiteouts": 0,
"num_read": 512,
"num_read_kb": 524288,
"num_write": 1092,
"num_write_kb": 602112,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 95,
"num_bytes_recovered": 398458880,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
252,
258,
36
],
"acting": [
252,
258,
36
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 252,
"acting_primary": 252,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 346836,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "258",
"pgid": "12.19",
"last_update": "6242'1003",
"last_complete": "6242'1003",
"log_tail": "0'0",
"last_user_version": 651,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 4291,
"epoch_pool_created": 4291,
"last_epoch_started": 346836,
"last_interval_started": 346835,
"last_epoch_clean": 346836,
"last_interval_clean": 346835,
"last_epoch_split": 0,
"last_epoch_marked_full": 266364,
"same_up_since": 346832,
"same_interval_since": 346835,
"same_primary_since": 346700,
"last_scrub": "6242'1003",
"last_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_deep_scrub": "6242'1003",
"last_deep_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_clean_scrub_stamp": "2022-03-14 22:53:26.138458"
},
"stats": {
"version": "6242'1003",
"reported_seq": "14206",
"reported_epoch": "346834",
"state": "remapped+peering",
"last_fresh": "2022-03-15 14:33:02.962535",
"last_change": "2022-03-15 14:33:01.955614",
"last_active": "2022-03-15 14:33:00.948390",
"last_peered": "2022-03-15 14:30:51.921835",
"last_clean": "2022-03-15 14:30:51.921835",
"last_became_active": "2022-03-15 14:23:02.731994",
"last_became_peered": "2022-03-15 14:23:02.731994",
"last_unstale": "2022-03-15 14:33:02.962535",
"last_undegraded": "2022-03-15 14:33:02.962535",
"last_fullsized": "2022-03-15 14:33:02.962535",
"mapping_epoch": 346835,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 4291,
"last_epoch_clean": 346709,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "6242'1003",
"last_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_deep_scrub": "6242'1003",
"last_deep_scrub_stamp": "2022-03-14 22:53:26.138458",
"last_clean_scrub_stamp": "2022-03-14 22:53:26.138458",
"log_size": 1003,
"ondisk_log_size": 1003,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": true,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 79691776,
"num_objects": 19,
"num_object_clones": 0,
"num_object_copies": 57,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 19,
"num_whiteouts": 0,
"num_read": 512,
"num_read_kb": 524288,
"num_write": 1092,
"num_write_kb": 602112,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 114,
"num_bytes_recovered": 478150656,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0,
"num_omap_bytes": 0,
"num_omap_keys": 0,
"num_objects_repaired": 0
},
"up": [
252,
258,
36
],
"acting": [
252,
258,
36
],
"avail_no_missing": [],
"object_location_counts": [],
"blocked_by": [],
"up_primary": 252,
"acting_primary": 252,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 346836,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Active",
"enter_time": "2022-03-15 14:33:05.173192",
"might_have_unfound": [],
"recovery_progress": {
"backfill_targets": [],
"waiting_on_backfill": [],
"last_backfill_started": "MIN",
"backfill_info": {
"begin": "MIN",
"end": "MIN",
"objects": []
},
"peer_backfill_info": [],
"backfills_in_flight": [],
"recovering": [],
"pg_backend": {
"pull_from_peer": [],
"pushing": []
}
},
"scrub": {
"scrubber.epoch_start": "0",
"scrubber.active": false,
"scrubber.state": "INACTIVE",
"scrubber.start": "MIN",
"scrubber.end": "MIN",
"scrubber.max_end": "MIN",
"scrubber.subset_last_update": "0'0",
"scrubber.deep": false,
"scrubber.waiting_on_whom": []
}
},
{
"name": "Started",
"enter_time": "2022-03-15 14:33:04.151166"
}
],
"agent_state": {}
}```----------------
Robert LeBlanc
PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1
_______________________________________________ Dev mailing list -- dev@xxxxxxx To unsubscribe send an email to dev-leave@xxxxxxx