15.2.8 mgr keep crashing every few days

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

I’d recently deployed ceph 15.2.8 with  3(mon,mgr,rgw,mds) and 4 (osd)
total 7 host, however I encountered mgr crash a few times a week, the
crashing mgr can be any one of 3. I couldn’t identify the problem behind
and here is the crash info, appreciate anyone if you have suggestions that
I could narrow it down.

Thank you very much.

{
    "assert_condition": "ret == 0",
    "assert_file":
"/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc",
    "assert_func": "void Thread::create(const char*, size_t)",
    "assert_line": 157,
    "assert_msg":
"/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc:
In function 'void Thread::create(const char*, size_t)' thread 7f833addc700
time
2021-02-10T20:00:32.980508+0000\n/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/15.2.8/rpm/el8/BUILD/ceph-15.2.8/src/common/Thread.cc:
157: FAILED ceph_assert(ret == 0)\n",
    "assert_thread_name": "mgr-fin",
    "backtrace": [
        "(()+0x12b20) [0x7f835a51cb20]",
        "(gsignal()+0x10f) [0x7f8358f6d7ff]",
        "(abort()+0x127) [0x7f8358f57c35]",
        "(ceph::__ceph_assert_fail(char const*, char const*, int, char
const*)+0x1a9) [0x7f835c07b735]",
        "(()+0x27a8fe) [0x7f835c07b8fe]",
        "(()+0x34cef6) [0x7f835c14def6]",
        "(DispatchQueue::start()+0x3a) [0x7f835c29697a]",
        "(AsyncMessenger::ready()+0xcd) [0x7f835c3340cd]",
        "(Messenger::add_dispatcher_head(Dispatcher*)+0x68)
[0x7f835c3f8478]",
        "(MonClient::get_monmap_and_config()+0xbb) [0x7f835c3f66ab]",
        "(ceph_mount_info::init()+0x4d) [0x7f834298435d]",
        "(()+0x3680f) [0x7f8342cd280f]",
        "(()+0x19d421) [0x7f835ba5c421]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(()+0x179c78) [0x7f835ba38c78]",
        "(()+0x19d1c7) [0x7f835ba5c1c7]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(()+0x179c78) [0x7f835ba38c78]",
        "(()+0x19d1c7) [0x7f835ba5c1c7]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(()+0x1221d4) [0x7f835b9e11d4]",
        "(()+0x122c55) [0x7f835b9e1c55]",
        "(()+0x19cf27) [0x7f835ba5bf27]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(_PyFunction_FastCallDict()+0x122) [0x7f835b9b9ec2]",
        "(_PyObject_FastCallDict()+0x70e) [0x7f835b9bac9e]",
        "(()+0x10dc70) [0x7f835b9ccc70]",
        "(_PyObject_FastCallDict()+0x6ec) [0x7f835b9bac7c]",
        "(PyObject_CallFunctionObjArgs()+0xe8) [0x7f835b9dbd48]",
        "(_PyEval_EvalFrameDefault()+0x2588) [0x7f835ba5eef8]",
        "(()+0xf99b4) [0x7f835b9b89b4]",
        "(()+0x179e60) [0x7f835ba38e60]",
        "(()+0x19d1c7) [0x7f835ba5c1c7]",
        "(_PyEval_EvalFrameDefault()+0x10d5) [0x7f835ba5da45]",
        "(()+0x179c78) [0x7f835ba38c78]",
        "(()+0x19d1c7) [0x7f835ba5c1c7]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(()+0xfa326) [0x7f835b9b9326]",
        "(()+0x179e60) [0x7f835ba38e60]",
        "(()+0x19d1c7) [0x7f835ba5c1c7]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(()+0x179c78) [0x7f835ba38c78]",
        "(()+0x19d1c7) [0x7f835ba5c1c7]",
        "(_PyEval_EvalFrameDefault()+0x498) [0x7f835ba5ce08]",
        "(_PyFunction_FastCallDict()+0x122) [0x7f835b9b9ec2]",
        "(_PyObject_FastCallDict()+0x70e) [0x7f835b9bac9e]",
        "(()+0x10dc70) [0x7f835b9ccc70]",
        "(PyObject_Call()+0x4b) [0x7f835b9c1acb]",
        "(PyObject_CallMethod()+0x10b) [0x7f835ba5ac6b]",
        "(ActivePyModule::handle_command(ModuleCommand const&, MgrSession
const&, std::map<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, boost::variant<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> >, bool, long, double,
std::vector<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > > >, std::vector<long,
std::allocator<long> >, std::vector<double, std::allocator<double> > >,
std::less<void>, std::allocator<std::pair<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > const,
boost::variant<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, bool, long, double,
std::vector<std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > > >, std::vector<long,
std::allocator<long> >, std::vector<double, std::allocator<double> > > > >
> const&, ceph::buffer::v15_2_0::list const&,
std::__cxx11::basic_stringstream<char, std::char_traits<char>,
std::allocator<char> >*, std::__cxx11::basic_stringstream<char,
std::char_traits<char>, std::allocator<char> >*)+0x222) [0x55bc0b8a0cb2]",
        "(()+0x1b0fdd) [0x55bc0b8f5fdd]",
        "(Context::complete(int)+0xd) [0x55bc0b8b0bdd]",
        "(Finisher::finisher_thread_entry()+0x1a5) [0x7f835c10b465]",
        "(()+0x814a) [0x7f835a51214a]",
        "(clone()+0x43) [0x7f8359032f23]"
    ],
    "ceph_version": "15.2.8",
    "crash_id":
"2021-02-10T20:00:32.989661Z_201fd5fb-6e0a-4b50-8a95-fdf9ed9aeb81",
    "entity_name": "mgr.sds01-cp.cwcxek",
    "os_id": "centos",
    "os_name": "CentOS Linux",
    "os_version": "8",
    "os_version_id": "8",
    "process_name": "ceph-mgr",
    "stack_sig":
"e1c15d685283e7598b128a37a328ba86ec433dfef97597ac9453b5d52608feda",
    "timestamp": "2021-02-10T20:00:32.989661Z",
    "utsname_hostname": "sds01-cp",
    "utsname_machine": "x86_64",
    "utsname_release": "4.18.0-240.10.1.el8_3.x86_64",
    "utsname_sysname": "Linux",
    "utsname_version": "#1 SMP Wed Dec 16 03:30:52 EST 2020"
}
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx




[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux