Hi all, I’d recently deployed ceph 15.2.8 with 3(mon,mgr,rgw,mds) and 4 (osd) total 7 host, however I encountered mgr crash a few times a week, the crashing mgr can be any one of 3. I couldn’t identify the problem behind and here is the crash info, appreciate anyone if you have suggestions that I could narrow it down. Thank you very much. { "assert_condition": "ret == 0", "assert_file": "/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc", "assert_func": "void Thread::create(const char*, size_t)", "assert_line": 157, "assert_msg": "/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc: In function 'void Thread::create(const char*, size_t)' thread 7f833addc700 time 2021-02-10T20:00:32.980508+0000\n/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc: 157: FAILED ceph_assert(ret == 0)\n", "assert_thread_name": "mgr-fin", "backtrace": [ "(()+0x12b20) [0x7f835a51cb20]", "(gsignal()+0x10f) [0x7f8358f6d7ff]", "(abort()+0x127) [0x7f8358f57c35]", "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a9) [0x7f835c07b735]", "(()+0x27a8fe) [0x7f835c07b8fe]", "(()+0x34cef6) [0x7f835c14def6]", "(DispatchQueue::start()+0x3a) [0x7f835c29697a]", "(AsyncMessenger::ready()+0xcd) [0x7f835c3340cd]", "(Messenger::add_dispatcher_head(Dispatcher*)+0x68) [0x7f835c3f8478]", "(MonClient::get_monmap_and_config()+0xbb) [0x7f835c3f66ab]", "(ceph_mount_info::init()+0x4d) [0x7f834298435d]", "(()+0x3680f) [0x7f8342cd280f]", "(()+0x19d421) [0x7f835ba5c421]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(()+0x179c78) [0x7f835ba38c78]", "(()+0x19d1c7) [0x7f835ba5c1c7]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(()+0x179c78) [0x7f835ba38c78]", "(()+0x19d1c7) [0x7f835ba5c1c7]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(()+0x1221d4) [0x7f835b9e11d4]", "(()+0x122c55) [0x7f835b9e1c55]", "(()+0x19cf27) [0x7f835ba5bf27]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(_PyFunction_FastCallDict()+0x122) [0x7f835b9b9ec2]", "(_PyObject_FastCallDict()+0x70e) [0x7f835b9bac9e]", "(()+0x10dc70) [0x7f835b9ccc70]", "(_PyObject_FastCallDict()+0x6ec) [0x7f835b9bac7c]", "(PyObject_CallFunctionObjArgs()+0xe8) [0x7f835b9dbd48]", "(_PyEval_EvalFrameDefault()+0x2588) [0x7f835ba5eef8]", "(()+0xf99b4) [0x7f835b9b89b4]", "(()+0x179e60) [0x7f835ba38e60]", "(()+0x19d1c7) [0x7f835ba5c1c7]", "(_PyEval_EvalFrameDefault()+0x10d5) [0x7f835ba5da45]", "(()+0x179c78) [0x7f835ba38c78]", "(()+0x19d1c7) [0x7f835ba5c1c7]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(()+0xfa326) [0x7f835b9b9326]", "(()+0x179e60) [0x7f835ba38e60]", "(()+0x19d1c7) [0x7f835ba5c1c7]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(()+0x179c78) [0x7f835ba38c78]", "(()+0x19d1c7) [0x7f835ba5c1c7]", "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]", "(_PyFunction_FastCallDict()+0x122) [0x7f835b9b9ec2]", "(_PyObject_FastCallDict()+0x70e) [0x7f835b9bac9e]", "(()+0x10dc70) [0x7f835b9ccc70]", "(PyObject_Call()+0x4b) [0x7f835b9c1acb]", "(PyObject_CallMethod()+0x10b) [0x7f835ba5ac6b]", "(ActivePyModule::handle_command(ModuleCommand const&, MgrSession const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, double, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<long, std::allocator<long> >, std::vector<double, std::allocator<double> > >, std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, bool, long, double, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<long, std::allocator<long> >, std::vector<double, std::allocator<double> > > > > > const&, ceph::buffer::v15_2_0::list const&, std::__cxx11::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >*, std::__cxx11::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >*)+0x222) [0x55bc0b8a0cb2]", "(()+0x1b0fdd) [0x55bc0b8f5fdd]", "(Context::complete(int)+0xd) [0x55bc0b8b0bdd]", "(Finisher::finisher_thread_entry()+0x1a5) [0x7f835c10b465]", "(()+0x814a) [0x7f835a51214a]", "(clone()+0x43) [0x7f8359032f23]" ], "ceph_version": "15.2.8", "crash_id": "2021-02-10T20:00:32.989661Z_201fd5fb-6e0a-4b50-8a95-fdf9ed9aeb81", "entity_name": "mgr.sds01-cp.cwcxek", "os_id": "centos", "os_name": "CentOS Linux", "os_version": "8", "os_version_id": "8", "process_name": "ceph-mgr", "stack_sig": "e1c15d685283e7598b128a37a328ba86ec433dfef97597ac9453b5d52608feda", "timestamp": "2021-02-10T20:00:32.989661Z", "utsname_hostname": "sds01-cp", "utsname_machine": "x86_64", "utsname_release": "4.18.0-240.10.1.el8_3.x86_64", "utsname_sysname": "Linux", "utsname_version": "#1 SMP Wed Dec 16 03:30:52 EST 2020" } _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx