mgr services frequently crash on nodes 2,3,4

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Mgr service crash frequently on nodes 2 3 and 4  with the same condition after the 4th node was added.

root@zp3110b001a0104:/# ceph crash stat
19 crashes recorded
16 older than 1 days old:
2023-07-29T03:35:32.006309Z_7b622c2b-a2fc-425a-acb8-dc1673b4c189
2023-07-29T03:35:32.055174Z_a2ee1e23-5f41-4dbe-86ff-643fbf870dc9
2023-07-29T14:34:13.752432Z_39b6a0d9-1bc3-4481-9a14-c92fea6c2710
2023-07-30T03:02:57.510867Z_df595e04-0ac2-4e3d-93be-a7225348ea19
2023-07-30T06:20:09.322530Z_0c2485f8-281c-4440-8b08-89b08a669de4
2023-07-30T10:16:46.798405Z_79082f37-ee08-4a2b-84d1-d96c4026f321
2023-07-30T10:16:46.843441Z_788391d6-3278-48c4-a95b-1934ee3265c1
2023-07-31T02:26:55.903966Z_416a1e94-a8e1-4057-a683-a907faf400a1
2023-07-31T04:40:10.216044Z_bef9d811-4e92-45cd-bcd7-3282962c8dfe
2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6
2023-07-31T09:22:12.527968Z_f136c93b-7156-4176-a734-66a5a62513a4
2023-07-31T15:22:08.417988Z_b80c6255-5eb3-41dd-b0b1-8bc5b070094f
2023-07-31T23:05:16.589501Z_20ed8ef9-a478-49de-a371-08ea7a9937e5
2023-08-01T01:26:01.911387Z_670f9e3c-7fbe-497f-9f0b-abeaefd8f2b3
2023-08-01T01:51:39.759874Z_ff8206e4-34aa-44fe-82ac-7339e6714bb7
2023-08-01T01:56:21.955706Z_98c86cdd-45ec-47dc-8f0c-2e5e09731db8
7 older than 3 days old:
2023-07-29T03:35:32.006309Z_7b622c2b-a2fc-425a-acb8-dc1673b4c189
2023-07-29T03:35:32.055174Z_a2ee1e23-5f41-4dbe-86ff-643fbf870dc9
2023-07-29T14:34:13.752432Z_39b6a0d9-1bc3-4481-9a14-c92fea6c2710
2023-07-30T03:02:57.510867Z_df595e04-0ac2-4e3d-93be-a7225348ea19
2023-07-30T06:20:09.322530Z_0c2485f8-281c-4440-8b08-89b08a669de4
2023-07-30T10:16:46.798405Z_79082f37-ee08-4a2b-84d1-d96c4026f321
2023-07-30T10:16:46.843441Z_788391d6-3278-48c4-a95b-1934ee3265c1

root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#<mailto:root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#> cat meta
{
    "crash_id": "2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6",
    "timestamp": "2023-07-31T08:44:20.893344Z",
    "process_name": "ceph-mgr",
    "entity_name": "mgr.zp3110b001a0104.tmbkzq",
    "ceph_version": "16.2.5",
    "utsname_hostname": "zp3110b001a0104",
    "utsname_sysname": "Linux",
    "utsname_release": "5.4.0-153-generic",
    "utsname_version": "#170-Ubuntu SMP Fri Jun 16 13:43:31 UTC 2023",
    "utsname_machine": "x86_64",
    "os_name": "CentOS Linux",
    "os_id": "centos",
    "os_version_id": "8",
    "os_version": "8",
    "assert_condition": "pending_service_map.epoch > service_map.epoch",
    "assert_func": "DaemonServer::got_service_map()::<lambda(const ServiceMap&)>",
    "assert_file": "/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.5/rpm/el8/BUILD/ceph-16.2.5/src/mgr/DaemonServer.cc",
    "assert_line": 2932,
    "assert_thread_name": "ms_dispatch",
    "assert_msg": "/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.5/rpm/el8/BUILD/ceph-16.2.5/src/mgr/DaemonServer.cc: In function 'DaemonServer::got_service_map()::<lambda(const ServiceMap&)>' thread 7f127440a700 time 2023-07-31T08:44:20.887150+0000\n/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/16.2.5/rpm/el8/BUILD/ceph-16.2.5/src/mgr/DaemonServer.cc: 2932: FAILED ceph_assert(pending_service_map.epoch > service_map.epoch)\n",
    "backtrace": [
        "/lib64/libpthread.so.0(+0x12b20) [0x7f127c611b20]",
        "gsignal()",
        "abort()",
        "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x7f127da26b75]",
        "/usr/lib64/ceph/libceph-common.so.2(+0x276d3e) [0x7f127da26d3e]",
        "(DaemonServer::got_service_map()+0xb2d) [0x5625aee23a4d]",
        "(Mgr::handle_service_map(boost::intrusive_ptr<MServiceMap>)+0x1b6) [0x5625aee527c6]",
        "(Mgr::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0x894) [0x5625aee55424]",
        "(MgrStandby::ms_dispatch2(boost::intrusive_ptr<Message> const&)+0xb0) [0x5625aee5ec10]",
        "(DispatchQueue::entry()+0x126a) [0x7f127dc610ca]",
        "(DispatchQueue::DispatchThread::entry()+0x11) [0x7f127dd11591]",
        "/lib64/libpthread.so.0(+0x814a) [0x7f127c60714a]",
        "clone()"
    ]
}
root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#<mailto:root@zp3110b001a0104:/var/lib/ceph/8dbfcd81-fee3-49d2-ac0c-e988c8be7178/crash/posted/2023-07-31T08:44:20.893344Z_037688ae-266f-4879-932c-2239f4679fd6#> more log
--- begin dump of recent events ---
-9999> 2023-07-31T08:27:14.084+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0
-9998> 2023-07-31T08:27:14.216+0000 7f1272406700 10 monclient: tick
-9997> 2023-07-31T08:27:14.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:44.220044+0000)
-9996> 2023-07-31T08:27:15.216+0000 7f1272406700 10 monclient: tick
-9995> 2023-07-31T08:27:15.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:45.220236+0000)
-9994> 2023-07-31T08:27:16.108+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0
-9993> 2023-07-31T08:27:16.216+0000 7f1272406700 10 monclient: tick
-9992> 2023-07-31T08:27:16.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:46.220455+0000)
-9991> 2023-07-31T08:27:17.216+0000 7f1272406700 10 monclient: tick
-9990> 2023-07-31T08:27:17.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:47.220605+0000)
-9989> 2023-07-31T08:27:18.132+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0
-9988> 2023-07-31T08:27:18.216+0000 7f1272406700 10 monclient: tick
-9987> 2023-07-31T08:27:18.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:48.220802+0000)
-9986> 2023-07-31T08:27:19.216+0000 7f1272406700 10 monclient: tick
-9985> 2023-07-31T08:27:19.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:49.220991+0000)
-9982> 2023-07-31T08:27:20.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:50.221181+0000)
-9981> 2023-07-31T08:27:21.216+0000 7f1272406700 10 monclient: tick
-9980> 2023-07-31T08:27:21.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:51.221376+0000)
-9979> 2023-07-31T08:27:22.180+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0
-9978> 2023-07-31T08:27:22.216+0000 7f1272406700 10 monclient: tick
-9977> 2023-07-31T08:27:22.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:52.221566+0000)
-9976> 2023-07-31T08:27:23.216+0000 7f1272406700 10 monclient: tick
-9975> 2023-07-31T08:27:23.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:53.221770+0000)
-9974> 2023-07-31T08:27:23.512+0000 7f11664d5700  4 mgr get_config get_config key: mgr/dashboard/AUDIT_API_ENABLED
-9973> 2023-07-31T08:27:23.512+0000 7f11664d5700  4 ceph_get_module_option AUDIT_API_ENABLED not found
-9972> 2023-07-31T08:27:23.512+0000 7f11664d5700  4 mgr get_config get_config key: mgr/dashboard/standby_behaviour
-9971> 2023-07-31T08:27:23.512+0000 7f11664d5700  4 ceph_get_module_option standby_behaviour not found
-9970> 2023-07-31T08:27:24.184+0000 7f126fc01700 10 monclient: _send_mon_message to mon.zp3110b001a0104 at v2:XX.XXX.26.4:3300/0
-9969> 2023-07-31T08:27:24.216+0000 7f1272406700 10 monclient: tick
-9968> 2023-07-31T08:27:24.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
1T08:26:54.221952+0000)
-9967> 2023-07-31T08:27:25.216+0000 7f1272406700 10 monclient: tick
-9966> 2023-07-31T08:27:25.216+0000 7f1272406700 10 monclient: _check_auth_rotating have uptodate secrets (they expire after 2023-07-3
root@zp3110b0
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux