We had a host that had hung this morning and after restarting it 17 OSDs across the cluster crashed with a past_interval assert. The weird thing is that the OSD complaining about the PG isn't in the acting, up or acting_recovery_backfill sets. This is 14.2.22 with BlueStore. It appears that it's trying to be in the peering group but maybe it has an old version of the pg that it wants to catch up. Since the up set is up, can we just trash the PG on osd.16 and have it resync it? This is an example of osd.16 logs. ``` -18> 2022-03-15 16:14:42.410 7f5124396700 1 osd.16 pg_epoch: 346705 pg[12.19( v 6242'1003 (0'0,6242'1003] local-lis/les=346701/346702 n=19 ec=4291/4291 lis/c 346701/345120 les/c/f 346702/345121/266364 346700/346705/346700) [252,16,2 67] r=1 lpr=346705 pi=[346196,346705)/2 crt=6242'1003 lcod 0'0 unknown mbc={}] start_peering_interval up [252,16,26 7] -> [252,16,267], acting [252,16] -> [252,16,267], acting_primary 252 -> 252, up_primary 252 -> 252, role 1 -> 1, features acting 4611087854035861503 upacting 4611087854035861503 -17> 2022-03-15 16:14:42.410 7f5124396700 -1 log_channel(cluster) log [ERR] : 12.19 past_intervals [346196,346705) start interval does not contain the required bound [345266,346705) start -16> 2022-03-15 16:14:42.410 7f5124396700 -1 osd.16 pg_epoch: 346705 pg[12.19( v 6242'1003 (0'0,6242'1003] local-lis/les=346701/346702 n=19 ec=4291/4291 lis/c 346701/345120 les/c/f 346702/345121/266364 346700/346705/346700) [252,16,2 67] r=1 lpr=346705 pi=[346196,346705)/2 crt=6242'1003 lcod 0'0 unknown NOTIFY mbc={}] 12.19 past_intervals [346196,346705) start interval does not contain the required bound [345266,346705) start -15> 2022-03-15 16:14:42.414 7f5124396700 -1 /build/ceph-14.2.22/src/osd/PG.cc: In function 'void PG::check_past_interval_bounds() const' thread 7f5124396700 time 2022-03-15 16:14:42.411247 /build/ceph-14.2.22/src/osd/PG.cc: 956: ceph_abort_msg("past_interval start interval mismatch") ``` ``` { "state": "active+clean", "snap_trimq": "[]", "snap_trimq_len": 0, "epoch": 348456, "up": [ 252, 258, 36 ], "acting": [ 252, 258, 36 ], "acting_recovery_backfill": [ "36", "252", "258" ], "info": { "pgid": "12.19", "last_update": "6242'1003", "last_complete": "6242'1003", "log_tail": "0'0", "last_user_version": 651, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": [], "history": { "epoch_created": 4291, "epoch_pool_created": 4291, "last_epoch_started": 346836, "last_interval_started": 346835, "last_epoch_clean": 346836, "last_interval_clean": 346835, "last_epoch_split": 0, "last_epoch_marked_full": 266364, "same_up_since": 346832, "same_interval_since": 346835, "same_primary_since": 346700, "last_scrub": "6242'1003", "last_scrub_stamp": "2022-03-14 22:53:26.138458", "last_deep_scrub": "6242'1003", "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458" }, "stats": { "version": "6242'1003", "reported_seq": "15883", "reported_epoch": "348456", "state": "active+clean", "last_fresh": "2022-03-15 15:52:00.061854", "last_change": "2022-03-15 14:33:05.179646", "last_active": "2022-03-15 15:52:00.061854", "last_peered": "2022-03-15 15:52:00.061854", "last_clean": "2022-03-15 15:52:00.061854", "last_became_active": "2022-03-15 14:33:05.179518", "last_became_peered": "2022-03-15 14:33:05.179518", "last_unstale": "2022-03-15 15:52:00.061854", "last_undegraded": "2022-03-15 15:52:00.061854", "last_fullsized": "2022-03-15 15:52:00.061854", "mapping_epoch": 346835, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 4291, "last_epoch_clean": 346836, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "6242'1003", "last_scrub_stamp": "2022-03-14 22:53:26.138458", "last_deep_scrub": "6242'1003", "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458", "log_size": 1003, "ondisk_log_size": 1003, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": true, "snaptrimq_len": 0, "stat_sum": { "num_bytes": 79691776, "num_objects": 19, "num_object_clones": 0, "num_object_copies": 57, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 19, "num_whiteouts": 0, "num_read": 512, "num_read_kb": 524288, "num_write": 1092, "num_write_kb": 602112, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 133, "num_bytes_recovered": 557842432, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 252, 258, 36 ], "acting": [ 252, 258, 36 ], "avail_no_missing": [], "object_location_counts": [], "blocked_by": [], "up_primary": 252, "acting_primary": 252, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 346836, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [ { "peer": "36", "pgid": "12.19", "last_update": "6242'1003", "last_complete": "6242'1003", "log_tail": "0'0", "last_user_version": 651, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": [], "history": { "epoch_created": 4291, "epoch_pool_created": 4291, "last_epoch_started": 346836, "last_interval_started": 346835, "last_epoch_clean": 346836, "last_interval_clean": 346835, "last_epoch_split": 0, "last_epoch_marked_full": 266364, "same_up_since": 346832, "same_interval_since": 346835, "same_primary_since": 346700, "last_scrub": "6242'1003", "last_scrub_stamp": "2022-03-14 22:53:26.138458", "last_deep_scrub": "6242'1003", "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458" }, "stats": { "version": "6242'1003", "reported_seq": "585", "reported_epoch": "345119", "state": "remapped+peering", "last_fresh": "2022-03-15 12:24:47.243534", "last_change": "2022-03-15 12:24:46.206723", "last_active": "2022-03-15 12:24:46.206497", "last_peered": "2022-03-15 12:23:55.204019", "last_clean": "2019-12-19 21:22:43.296344", "last_became_active": "2022-03-15 12:14:42.790929", "last_became_peered": "2022-03-15 12:14:42.790929", "last_unstale": "2022-03-15 12:24:47.243534", "last_undegraded": "2022-03-15 12:24:47.243534", "last_fullsized": "2022-03-15 12:24:47.243534", "mapping_epoch": 346835, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 4291, "last_epoch_clean": 344591, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "6242'1003", "last_scrub_stamp": "2022-03-14 22:53:26.138458", "last_deep_scrub": "6242'1003", "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458", "log_size": 1003, "ondisk_log_size": 1003, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": true, "snaptrimq_len": 0, "stat_sum": { "num_bytes": 79691776, "num_objects": 19, "num_object_clones": 0, "num_object_copies": 57, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 19, "num_whiteouts": 0, "num_read": 512, "num_read_kb": 524288, "num_write": 1092, "num_write_kb": 602112, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 95, "num_bytes_recovered": 398458880, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 252, 258, 36 ], "acting": [ 252, 258, 36 ], "avail_no_missing": [], "object_location_counts": [], "blocked_by": [], "up_primary": 252, "acting_primary": 252, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 346836, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "258", "pgid": "12.19", "last_update": "6242'1003", "last_complete": "6242'1003", "log_tail": "0'0", "last_user_version": 651, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": [], "history": { "epoch_created": 4291, "epoch_pool_created": 4291, "last_epoch_started": 346836, "last_interval_started": 346835, "last_epoch_clean": 346836, "last_interval_clean": 346835, "last_epoch_split": 0, "last_epoch_marked_full": 266364, "same_up_since": 346832, "same_interval_since": 346835, "same_primary_since": 346700, "last_scrub": "6242'1003", "last_scrub_stamp": "2022-03-14 22:53:26.138458", "last_deep_scrub": "6242'1003", "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458" }, "stats": { "version": "6242'1003", "reported_seq": "14206", "reported_epoch": "346834", "state": "remapped+peering", "last_fresh": "2022-03-15 14:33:02.962535", "last_change": "2022-03-15 14:33:01.955614", "last_active": "2022-03-15 14:33:00.948390", "last_peered": "2022-03-15 14:30:51.921835", "last_clean": "2022-03-15 14:30:51.921835", "last_became_active": "2022-03-15 14:23:02.731994", "last_became_peered": "2022-03-15 14:23:02.731994", "last_unstale": "2022-03-15 14:33:02.962535", "last_undegraded": "2022-03-15 14:33:02.962535", "last_fullsized": "2022-03-15 14:33:02.962535", "mapping_epoch": 346835, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 4291, "last_epoch_clean": 346709, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "6242'1003", "last_scrub_stamp": "2022-03-14 22:53:26.138458", "last_deep_scrub": "6242'1003", "last_deep_scrub_stamp": "2022-03-14 22:53:26.138458", "last_clean_scrub_stamp": "2022-03-14 22:53:26.138458", "log_size": 1003, "ondisk_log_size": 1003, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "manifest_stats_invalid": true, "snaptrimq_len": 0, "stat_sum": { "num_bytes": 79691776, "num_objects": 19, "num_object_clones": 0, "num_object_copies": 57, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 19, "num_whiteouts": 0, "num_read": 512, "num_read_kb": 524288, "num_write": 1092, "num_write_kb": 602112, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 114, "num_bytes_recovered": 478150656, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0, "num_legacy_snapsets": 0, "num_large_omap_objects": 0, "num_objects_manifest": 0, "num_omap_bytes": 0, "num_omap_keys": 0, "num_objects_repaired": 0 }, "up": [ 252, 258, 36 ], "acting": [ 252, 258, 36 ], "avail_no_missing": [], "object_location_counts": [], "blocked_by": [], "up_primary": 252, "acting_primary": 252, "purged_snaps": [] }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 346836, "hit_set_history": { "current_last_update": "0'0", "history": [] } } ], "recovery_state": [ { "name": "Started/Primary/Active", "enter_time": "2022-03-15 14:33:05.173192", "might_have_unfound": [], "recovery_progress": { "backfill_targets": [], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "pull_from_peer": [], "pushing": [] } }, "scrub": { "scrubber.epoch_start": "0", "scrubber.active": false, "scrubber.state": "INACTIVE", "scrubber.start": "MIN", "scrubber.end": "MIN", "scrubber.max_end": "MIN", "scrubber.subset_last_update": "0'0", "scrubber.deep": false, "scrubber.waiting_on_whom": [] } }, { "name": "Started", "enter_time": "2022-03-15 14:33:04.151166" } ], "agent_state": {} } ``` ---------------- Robert LeBlanc PGP Fingerprint 79A2 9CA4 6CC4 45DD A904 C70E E654 3BB2 FA62 B9F1 _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx