TLDR; Old crusty PGs that could be deleted without consequence. Since it looked like we had a full set of OSDs providing the PG, I went ahead and removed the PG from the OSD that was having trouble and the OSD booted fine. We were able to do this for all 17 OSDs and the cluster is now online again. I had exported the first PG from the up set, but didn't wind up needing it so I didn't do it on the rest. ---------------- Robert LeBlanc PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1 On Tue, Mar 15, 2022 at 10:26 AM Robert LeBlanc <robert@xxxxxxxxxxxxx> wrote: > We had a host that had hung this morning and after restarting it 17 OSDs > across the cluster crashed with a past_interval assert. The weird thing is > that the OSD complaining about the PG isn't in the acting, up or > acting_recovery_backfill sets. This is 14.2.22 with BlueStore. > > It appears that it's trying to be in the peering group but maybe it has an > old version of the pg that it wants to catch up. Since the up set is up, > can we just trash the PG on osd.16 and have it resync it? > > This is an example of osd.16 logs. > ``` > -18> 2022-03-15 16:14:42.410 7f5124396700 1 osd.16 pg_epoch: 346705 > pg[12.19( v 6242'1003 (0'0,6242'1003] local-lis/les=346701/346702 n=19 > ec=4291/4291 lis/c 346701/345120 les/c/f 346702/345121/266364 > 346700/346705/346700) [252,16,2 > 67] r=1 lpr=346705 pi=[346196,346705)/2 crt=6242'1003 lcod 0'0 unknown > mbc={}] start_peering_interval up [252,16,26 > 7] -> [252,16,267], acting [252,16] -> [252,16,267], acting_primary 252 -> > 252, up_primary 252 -> 252, role 1 -> 1, features acting > 4611087854035861503 upacting 4611087854035861503 > -17> 2022-03-15 16:14:42.410 7f5124396700 -1 log_channel(cluster) log > [ERR] : 12.19 past_intervals [346196,346705) start interval does not > contain the required bound [345266,346705) start > -16> 2022-03-15 16:14:42.410 7f5124396700 -1 osd.16 pg_epoch: 346705 > pg[12.19( v 6242'1003 (0'0,6242'1003] local-lis/les=346701/346702 n=19 > ec=4291/4291 lis/c 346701/345120 les/c/f 346702/345121/266364 > 346700/346705/346700) [252,16,2 > 67] r=1 lpr=346705 pi=[346196,346705)/2 crt=6242'1003 lcod 0'0 unknown > NOTIFY mbc={}] 12.19 past_intervals [346196,346705) start interval does > not contain the required bound [345266,346705) start > -15> 2022-03-15 16:14:42.414 7f5124396700 -1 > /build/ceph-14.2.22/src/osd/PG.cc: In function 'void PG::check_ > past_interval_bounds() const' thread 7f5124396700 time 2022-03-15 > 16:14:42.411247 > /build/ceph-14.2.22/src/osd/PG.cc: 956: ceph_abort_msg("past_interval > start interval mismatch") > ``` > ``` > { > "state": "active+clean", > "snap_trimq": "[]", > "snap_trimq_len": 0, > "epoch": 348456, > "up": [ > 252, > 258, > 36 > ], > "acting": [ > 252, > 258, > 36 > ], > "acting_recovery_backfill": [ > "36", > "252", > "258" > ], > "info": { > "pgid": "12.19", > "last_update": "6242'1003", > "last_complete": "6242'1003", > "log_tail": "0'0", > "last_user_version": 651, > "last_backfill": "MAX", > "last_backfill_bitwise": 0, > "purged_snaps": [], > "history": { > "epoch_created": 4291, > "epoch_pool_created": 4291, > "last_epoch_started": 346836, > "last_interval_started": 346835, > "last_epoch_clean": 346836, > "last_interval_clean": 346835, > "last_epoch_split": 0, > "last_epoch_marked_full": 266364, > "same_up_since": 346832, > "same_interval_since": 346835, > "same_primary_since": 346700, > "last_scrub": "6242'1003", > "last_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_deep_scrub": "6242'1003", > "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458" > }, > "stats": { > "version": "6242'1003", > "reported_seq": "15883", > "reported_epoch": "348456", > "state": "active+clean", > "last_fresh": "2022-03-15 15:52:00.061854", > "last_change": "2022-03-15 14:33:05.179646", > "last_active": "2022-03-15 15:52:00.061854", > "last_peered": "2022-03-15 15:52:00.061854", > "last_clean": "2022-03-15 15:52:00.061854", > "last_became_active": "2022-03-15 14:33:05.179518", > "last_became_peered": "2022-03-15 14:33:05.179518", > "last_unstale": "2022-03-15 15:52:00.061854", > "last_undegraded": "2022-03-15 15:52:00.061854", > "last_fullsized": "2022-03-15 15:52:00.061854", > "mapping_epoch": 346835, > "log_start": "0'0", > "ondisk_log_start": "0'0", > "created": 4291, > "last_epoch_clean": 346836, > "parent": "0.0", > "parent_split_bits": 0, > "last_scrub": "6242'1003", > "last_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_deep_scrub": "6242'1003", > "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458", > "log_size": 1003, > "ondisk_log_size": 1003, > "stats_invalid": false, > "dirty_stats_invalid": false, > "omap_stats_invalid": false, > "hitset_stats_invalid": false, > "hitset_bytes_stats_invalid": false, > "pin_stats_invalid": false, > "manifest_stats_invalid": true, > "snaptrimq_len": 0, > "stat_sum": { > "num_bytes": 79691776, > "num_objects": 19, > "num_object_clones": 0, > "num_object_copies": 57, > "num_objects_missing_on_primary": 0, > "num_objects_missing": 0, > "num_objects_degraded": 0, > "num_objects_misplaced": 0, > "num_objects_unfound": 0, > "num_objects_dirty": 19, > "num_whiteouts": 0, > "num_read": 512, > "num_read_kb": 524288, > "num_write": 1092, > "num_write_kb": 602112, > "num_scrub_errors": 0, > "num_shallow_scrub_errors": 0, > "num_deep_scrub_errors": 0, > "num_objects_recovered": 133, > "num_bytes_recovered": 557842432, > "num_keys_recovered": 0, > "num_objects_omap": 0, > "num_objects_hit_set_archive": 0, > "num_bytes_hit_set_archive": 0, > "num_flush": 0, > "num_flush_kb": 0, > "num_evict": 0, > "num_evict_kb": 0, > "num_promote": 0, > "num_flush_mode_high": 0, > "num_flush_mode_low": 0, > "num_evict_mode_some": 0, > "num_evict_mode_full": 0, > "num_objects_pinned": 0, > "num_legacy_snapsets": 0, > "num_large_omap_objects": 0, > "num_objects_manifest": 0, > "num_omap_bytes": 0, > "num_omap_keys": 0, > "num_objects_repaired": 0 > }, > "up": [ > 252, > 258, > 36 > ], > "acting": [ > 252, > 258, > 36 > ], > "avail_no_missing": [], > "object_location_counts": [], > "blocked_by": [], > "up_primary": 252, > "acting_primary": 252, > "purged_snaps": [] > }, > "empty": 0, > "dne": 0, > "incomplete": 0, > "last_epoch_started": 346836, > "hit_set_history": { > "current_last_update": "0'0", > "history": [] > } > }, > "peer_info": [ > { > "peer": "36", > "pgid": "12.19", > "last_update": "6242'1003", > "last_complete": "6242'1003", > "log_tail": "0'0", > "last_user_version": 651, > "last_backfill": "MAX", > "last_backfill_bitwise": 0, > "purged_snaps": [], > "history": { > "epoch_created": 4291, > "epoch_pool_created": 4291, > "last_epoch_started": 346836, > "last_interval_started": 346835, > "last_epoch_clean": 346836, > "last_interval_clean": 346835, > "last_epoch_split": 0, > "last_epoch_marked_full": 266364, > "same_up_since": 346832, > "same_interval_since": 346835, > "same_primary_since": 346700, > "last_scrub": "6242'1003", > "last_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_deep_scrub": "6242'1003", > "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458" > }, > "stats": { > "version": "6242'1003", > "reported_seq": "585", > "reported_epoch": "345119", > "state": "remapped+peering", > "last_fresh": "2022-03-15 12:24:47.243534", > "last_change": "2022-03-15 12:24:46.206723", > "last_active": "2022-03-15 12:24:46.206497", > "last_peered": "2022-03-15 12:23:55.204019", > "last_clean": "2019-12-19 21:22:43.296344", > "last_became_active": "2022-03-15 12:14:42.790929", > "last_became_peered": "2022-03-15 12:14:42.790929", > "last_unstale": "2022-03-15 12:24:47.243534", > "last_undegraded": "2022-03-15 12:24:47.243534", > "last_fullsized": "2022-03-15 12:24:47.243534", > "mapping_epoch": 346835, > "log_start": "0'0", > "ondisk_log_start": "0'0", > "created": 4291, > "last_epoch_clean": 344591, > "parent": "0.0", > "parent_split_bits": 0, > "last_scrub": "6242'1003", > "last_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_deep_scrub": "6242'1003", > "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458", > "log_size": 1003, > "ondisk_log_size": 1003, > "stats_invalid": false, > "dirty_stats_invalid": false, > "omap_stats_invalid": false, > "hitset_stats_invalid": false, > "hitset_bytes_stats_invalid": false, > "pin_stats_invalid": false, > "manifest_stats_invalid": true, > "snaptrimq_len": 0, > "stat_sum": { > "num_bytes": 79691776, > "num_objects": 19, > "num_object_clones": 0, > "num_object_copies": 57, > "num_objects_missing_on_primary": 0, > "num_objects_missing": 0, > "num_objects_degraded": 0, > "num_objects_misplaced": 0, > "num_objects_unfound": 0, > "num_objects_dirty": 19, > "num_whiteouts": 0, > "num_read": 512, > "num_read_kb": 524288, > "num_write": 1092, > "num_write_kb": 602112, > "num_scrub_errors": 0, > "num_shallow_scrub_errors": 0, > "num_deep_scrub_errors": 0, > "num_objects_recovered": 95, > "num_bytes_recovered": 398458880, > "num_keys_recovered": 0, > "num_objects_omap": 0, > "num_objects_hit_set_archive": 0, > "num_bytes_hit_set_archive": 0, > "num_flush": 0, > "num_flush_kb": 0, > "num_evict": 0, > "num_evict_kb": 0, > "num_promote": 0, > "num_flush_mode_high": 0, > "num_flush_mode_low": 0, > "num_evict_mode_some": 0, > "num_evict_mode_full": 0, > "num_objects_pinned": 0, > "num_legacy_snapsets": 0, > "num_large_omap_objects": 0, > "num_objects_manifest": 0, > "num_omap_bytes": 0, > "num_omap_keys": 0, > "num_objects_repaired": 0 > }, > "up": [ > 252, > 258, > 36 > ], > "acting": [ > 252, > 258, > 36 > ], > "avail_no_missing": [], > "object_location_counts": [], > "blocked_by": [], > "up_primary": 252, > "acting_primary": 252, > "purged_snaps": [] > }, > "empty": 0, > "dne": 0, > "incomplete": 0, > "last_epoch_started": 346836, > "hit_set_history": { > "current_last_update": "0'0", > "history": [] > } > }, > { > "peer": "258", > "pgid": "12.19", > "last_update": "6242'1003", > "last_complete": "6242'1003", > "log_tail": "0'0", > "last_user_version": 651, > "last_backfill": "MAX", > "last_backfill_bitwise": 0, > "purged_snaps": [], > "history": { > "epoch_created": 4291, > "epoch_pool_created": 4291, > "last_epoch_started": 346836, > "last_interval_started": 346835, > "last_epoch_clean": 346836, > "last_interval_clean": 346835, > "last_epoch_split": 0, > "last_epoch_marked_full": 266364, > "same_up_since": 346832, > "same_interval_since": 346835, > "same_primary_since": 346700, > "last_scrub": "6242'1003", > "last_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_deep_scrub": "6242'1003", > "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458" > }, > "stats": { > "version": "6242'1003", > "reported_seq": "14206", > "reported_epoch": "346834", > "state": "remapped+peering", > "last_fresh": "2022-03-15 14:33:02.962535", > "last_change": "2022-03-15 14:33:01.955614", > "last_active": "2022-03-15 14:33:00.948390", > "last_peered": "2022-03-15 14:30:51.921835", > "last_clean": "2022-03-15 14:30:51.921835", > "last_became_active": "2022-03-15 14:23:02.731994", > "last_became_peered": "2022-03-15 14:23:02.731994", > "last_unstale": "2022-03-15 14:33:02.962535", > "last_undegraded": "2022-03-15 14:33:02.962535", > "last_fullsized": "2022-03-15 14:33:02.962535", > "mapping_epoch": 346835, > "log_start": "0'0", > "ondisk_log_start": "0'0", > "created": 4291, > "last_epoch_clean": 346709, > "parent": "0.0", > "parent_split_bits": 0, > "last_scrub": "6242'1003", > "last_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_deep_scrub": "6242'1003", > "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", > "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458", > "log_size": 1003, > "ondisk_log_size": 1003, > "stats_invalid": false, > "dirty_stats_invalid": false, > "omap_stats_invalid": false, > "hitset_stats_invalid": false, > "hitset_bytes_stats_invalid": false, > "pin_stats_invalid": false, > "manifest_stats_invalid": true, > "snaptrimq_len": 0, > "stat_sum": { > "num_bytes": 79691776, > "num_objects": 19, > "num_object_clones": 0, > "num_object_copies": 57, > "num_objects_missing_on_primary": 0, > "num_objects_missing": 0, > "num_objects_degraded": 0, > "num_objects_misplaced": 0, > "num_objects_unfound": 0, > "num_objects_dirty": 19, > "num_whiteouts": 0, > "num_read": 512, > "num_read_kb": 524288, > "num_write": 1092, > "num_write_kb": 602112, > "num_scrub_errors": 0, > "num_shallow_scrub_errors": 0, > "num_deep_scrub_errors": 0, > "num_objects_recovered": 114, > "num_bytes_recovered": 478150656, > "num_keys_recovered": 0, > "num_objects_omap": 0, > "num_objects_hit_set_archive": 0, > "num_bytes_hit_set_archive": 0, > "num_flush": 0, > "num_flush_kb": 0, > "num_evict": 0, > "num_evict_kb": 0, > "num_promote": 0, > "num_flush_mode_high": 0, > "num_flush_mode_low": 0, > "num_evict_mode_some": 0, > "num_evict_mode_full": 0, > "num_objects_pinned": 0, > "num_legacy_snapsets": 0, > "num_large_omap_objects": 0, > "num_objects_manifest": 0, > "num_omap_bytes": 0, > "num_omap_keys": 0, > "num_objects_repaired": 0 > }, > "up": [ > 252, > 258, > 36 > ], > "acting": [ > 252, > 258, > 36 > ], > "avail_no_missing": [], > "object_location_counts": [], > "blocked_by": [], > "up_primary": 252, > "acting_primary": 252, > "purged_snaps": [] > }, > "empty": 0, > "dne": 0, > "incomplete": 0, > "last_epoch_started": 346836, > "hit_set_history": { > "current_last_update": "0'0", > "history": [] > } > } > ], > "recovery_state": [ > { > "name": "Started/Primary/Active", > "enter_time": "2022-03-15 14:33:05.173192", > "might_have_unfound": [], > "recovery_progress": { > "backfill_targets": [], > "waiting_on_backfill": [], > "last_backfill_started": "MIN", > "backfill_info": { > "begin": "MIN", > "end": "MIN", > "objects": [] > }, > "peer_backfill_info": [], > "backfills_in_flight": [], > "recovering": [], > "pg_backend": { > "pull_from_peer": [], > "pushing": [] > } > }, > "scrub": { > "scrubber.epoch_start": "0", > "scrubber.active": false, > "scrubber.state": "INACTIVE", > "scrubber.start": "MIN", > "scrubber.end": "MIN", > "scrubber.max_end": "MIN", > "scrubber.subset_last_update": "0'0", > "scrubber.deep": false, > "scrubber.waiting_on_whom": [] > } > }, > { > "name": "Started", > "enter_time": "2022-03-15 14:33:04.151166" > } > ], > "agent_state": {} > } > ``` > > ---------------- > Robert LeBlanc > PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1 > _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx