Hi, Mgr service crash frequently on nodes 2 3 and 4 with the same condition after the 4th node was added. root@zp3110b001a0104:/# ceph crash stat 19 crashes recorded 16 older than 1 days old: 2023-07-29T03:35:32.006309Z_7b622c2b-a2fc-425a-acb8-dc1673b4c189 2023-07-29T03:35:32.055174Z_a2ee1e23-5f41-4dbe-86ff-643fbf870dc9 2023-07-29T14:34:13.752432Z_39b6a0d9-1bc3-4481-9a14-c92fea6c2710 2023-07-30T03:02:57.510867Z_df595e04-0ac2-4e3d-93be-a7225348ea19 2023-07-30T06:20:09.322530Z_0c2485f8-281c-4440-8b08-89b08a669de4 2023-07-30T10:16:46.798405Z_79082f37-ee08-4a2b-84d1-d96c4026f321 2023-07-30T10:16:46.843441Z_788391d6-3278-48c4-a95b-1934ee3265c1 2023-07-31T02:26:55.903966Z_416a1e94-a8e1-4057-a683-a907faf400a1 2023-07-31T04:40:10.216044Z_bef9d811-4e92-45cd-bcd7-3282962c8dfe 2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6 2023-07-31T09:22:12.527968Z_f136c93b-7156-4176-a734-66a5a62513a4 2023-07-31T15:22:08.417988Z_b80c6255-5eb3-41dd-b0b1-8bc5b070094f 2023-07-31T23:05:16.589501Z_20ed8ef9-a478-49de-a371-08ea7a9937e5 2023-08-01T01:26:01.911387Z_670f9e3c-7fbe-497f-9f0b-abeaefd8f2b3 2023-08-01T01:51:39.759874Z_ff8206e4-34aa-44fe-82ac-7339e6714bb7 2023-08-01T01:56:21.955706Z_98c86cdd-45ec-47dc-8f0c-2e5e09731db8 7 older than 3 days old: 2023-07-29T03:35:32.006309Z_7b622c2b-a2fc-425a-acb8-dc1673b4c189 2023-07-29T03:35:32.055174Z_a2ee1e23-5f41-4dbe-86ff-643fbf870dc9 2023-07-29T14:34:13.752432Z_39b6a0d9-1bc3-4481-9a14-c92fea6c2710 2023-07-30T03:02:57.510867Z_df595e04-0ac2-4e3d-93be-a7225348ea19 2023-07-30T06:20:09.322530Z_0c2485f8-281c-4440-8b08-89b08a669de4 2023-07-30T10:16:46.798405Z_79082f37-ee08-4a2b-84d1-d96c4026f321 2023-07-30T10:16:46.843441Z_788391d6-3278-48c4-a95b-1934ee3265c1 root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#<mailto:root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#> cat meta { "crash_id": "2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6", "timestamp": "2023-07-31T08:44:20.893344Z", "process_name": "ceph-mgr", "entity_name": "mgr.zp3110b001a0104.tmbkzq", "ceph_version": "16.2.5", "utsname_hostname": "zp3110b001a0104", "utsname_sysname": "Linux", "utsname_release": "5.4.0-153-generic", "utsname_version": "#170-Ubuntu SMP Fri Jun 16 13:43:31 UTC 2023", "utsname_machine": "x86_64", "os_name": "CentOS Linux", "os_id": "centos", "os_version_id": "8", "os_version": "8", "assert_condition": "pending_service_map.epoch > service_map.epoch", "assert_func": "DaemonServer::got_service_map()::<lambda(const ServiceMap&)>", "assert_file": "/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.5/rpm/el8/BUILD/ceph-16.2.5/src/mgr/DaemonServer.cc", "assert_line": 2932, "assert_thread_name": "ms_dispatch", "assert_msg": "/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.5/rpm/el8/BUILD/ceph-16.2.5/src/mgr/DaemonServer.cc: In function 'DaemonServer::got_service_map()::<lambda(const ServiceMap&)>' thread 7f127440a700 time 2023-07-31T08:44:20.887150+0000\n/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.5/rpm/el8/BUILD/ceph-16.2.5/src/mgr/DaemonServer.cc: 2932: FAILED ceph_assert(pending_service_map.epoch > service_map.epoch)\n", "backtrace": [ "/lib64/libpthread.so.0(+0x12b20) [0x7f127c611b20]", "gsignal()", "abort()", "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x7f127da26b75]", "/usr/lib64/ceph/libceph-common.so.2(+0x276d3e) [0x7f127da26d3e]", "(DaemonServer::got_service_map()+0xb2d) [0x5625aee23a4d]", "(Mgr::handle_service_map(boost::intrusive_ptr<MServiceMap>)+0x1b6) [0x5625aee527c6]", "(Mgr::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0x894) [0x5625aee55424]", "(MgrStandby::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0xb0) [0x5625aee5ec10]", "(DispatchQueue::entry()+0x126a) [0x7f127dc610ca]", "(DispatchQueue::DispatchThread::entry()+0x11) [0x7f127dd11591]", "/lib64/libpthread.so.0(+0x814a) [0x7f127c60714a]", "clone()" ] } root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#<mailto:root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#> more log --- begin dump of recent events --- -9999> 2023-07-31T08:27:14.084+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0 -9998> 2023-07-31T08:27:14.216+0000 7f1272406700 10 monclient: tick -9997> 2023-07-31T08:27:14.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:44.220044+0000) -9996> 2023-07-31T08:27:15.216+0000 7f1272406700 10 monclient: tick -9995> 2023-07-31T08:27:15.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:45.220236+0000) -9994> 2023-07-31T08:27:16.108+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0 -9993> 2023-07-31T08:27:16.216+0000 7f1272406700 10 monclient: tick -9992> 2023-07-31T08:27:16.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:46.220455+0000) -9991> 2023-07-31T08:27:17.216+0000 7f1272406700 10 monclient: tick -9990> 2023-07-31T08:27:17.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:47.220605+0000) -9989> 2023-07-31T08:27:18.132+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0 -9988> 2023-07-31T08:27:18.216+0000 7f1272406700 10 monclient: tick -9987> 2023-07-31T08:27:18.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:48.220802+0000) -9986> 2023-07-31T08:27:19.216+0000 7f1272406700 10 monclient: tick -9985> 2023-07-31T08:27:19.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:49.220991+0000) -9982> 2023-07-31T08:27:20.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:50.221181+0000) -9981> 2023-07-31T08:27:21.216+0000 7f1272406700 10 monclient: tick -9980> 2023-07-31T08:27:21.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:51.221376+0000) -9979> 2023-07-31T08:27:22.180+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0 -9978> 2023-07-31T08:27:22.216+0000 7f1272406700 10 monclient: tick -9977> 2023-07-31T08:27:22.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:52.221566+0000) -9976> 2023-07-31T08:27:23.216+0000 7f1272406700 10 monclient: tick -9975> 2023-07-31T08:27:23.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:53.221770+0000) -9974> 2023-07-31T08:27:23.512+0000 7f11664d5700 4 mgr get_config get_config key: mgr/dashboard/AUDIT_API_ENABLED -9973> 2023-07-31T08:27:23.512+0000 7f11664d5700 4 ceph_get_module_option AUDIT_API_ENABLED not found -9972> 2023-07-31T08:27:23.512+0000 7f11664d5700 4 mgr get_config get_config key: mgr/dashboard/standby_behaviour -9971> 2023-07-31T08:27:23.512+0000 7f11664d5700 4 ceph_get_module_option standby_behaviour not found -9970> 2023-07-31T08:27:24.184+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0 -9969> 2023-07-31T08:27:24.216+0000 7f1272406700 10 monclient: tick -9968> 2023-07-31T08:27:24.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 1T08:26:54.221952+0000) -9967> 2023-07-31T08:27:25.216+0000 7f1272406700 10 monclient: tick -9966> 2023-07-31T08:27:25.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3 root@zp3110b0 _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx