OSDs crush - Since Pacific

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Dear All,

After updating our ceph cluster from Octopus to Pacific , we got a lot of a slow_ops on many osds ( which caused the cluster to become very slow ) .
We did our investiguation and search on the ceph-users list and we found that rebuilding all OSD scan improve ( or fix ) the issue ( we have a doubt that the cause is a defragmented bluestore filesystem ) , but unfortinatley we got recently ( when we are still rebuilding osds => onbe by one ) other OSDs crushed , with  no clear reason . Here below the crash report for one osd , may bey ou have some information ??

Many Thanks



{
    "crash_id": "2022-08-22T09:21:34.369579Z_9527bc17-d981-4a14-a66f-33f8357e8644",
    "timestamp": "2022-08-22T09:21:34.369579Z",
    "process_name": "ceph-osd",
    "entity_name": "osd.17",
    "ceph_version": "16.2.9",
    "utsname_hostname": "xxxxxxxxx",
    "utsname_sysname": "Linux",
    "utsname_release": "4.15.0-162-generic",
    "utsname_version": "#170-Ubuntu SMP Mon Oct 18 11:38:05 UTC 2021",
    "utsname_machine": "x86_64",
    "os_name": "Ubuntu",
    "os_id": "ubuntu",
    "os_version_id": "18.04",
    "os_version": "18.04.6 LTS (Bionic Beaver)",
    "backtrace": [
        "/lib/x86_64-linux-gnu/libpthread.so.0(+0x12980) [0x7fda0ddbf980]",
        "madvise()",
        "(TCMalloc_SystemRelease(void*, unsigned long)+0x8a) [0x7fda0e8d580a]",
        "(tcmalloc::PageHeap::DecommitSpan(tcmalloc::Span*)+0x20) [0x7fda0e8d6e80]",
        "(tcmalloc::PageHeap::MergeIntoFreeList(tcmalloc::Span*)+0x21b) [0x7fda0e8d72bb]",
        "(tcmalloc::PageHeap::Delete(tcmalloc::Span*)+0x23) [0x7fda0e8d7513]",
        "(tcmalloc::CentralFreeList::ReleaseToSpans(void*)+0x11d) [0x7fda0e8d629d]",
       "(tcmalloc::CentralFreeList::ReleaseListToSpans(void*)+0x1b) [0x7fda0e8d635b]",
        "(tcmalloc::CentralFreeList::InsertRange(void*, void*, int)+0x3f) [0x7fda0e8d65ff]",
        "(tcmalloc::ThreadCache::ReleaseToCentralCache(tcmalloc::ThreadCache::FreeList*, unsigned long, int)+0x110) [0x7fda0e8d9c00]",
        "(tcmalloc::ThreadCache::ListTooLong(tcmalloc::ThreadCache::FreeList*, unsigned long)+0x1b) [0x7fda0e8d9c9b]",
        "cfree()",
      "(std::_Rb_tree<ghobject_t, std::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::_Select1st<st
d::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::less<ghobject_t>, std::allocator<std::pair<ghobje
ct_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >::_M_erase(std::_Rb_tree_node<std::pair<ghobject_t const, std::__cxx
11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)+0x82) [0x55b9c4e63642]",
        "(std::_Rb_tree<ghobject_t, std::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::_Select1st<st
d::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::less<ghobject_t>, std::allocator<std::pair<ghobje
ct_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >::_M_erase(std::_Rb_tree_node<std::pair<ghobject_t const, std::__cxx
11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)+0x1f) [0x55b9c4e635df]",
        "(std::_Rb_tree<ghobject_t, std::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::_Select1st<st
d::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::less<ghobject_t>, std::allocator<std::pair<ghobje
ct_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >::_M_erase(std::_Rb_tree_node<std::pair<ghobject_t const, std::__cxx
11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)+0x1f) [0x55b9c4e635df]",
        "(std::_Rb_tree<ghobject_t, std::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::_Select1st<st
d::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::less<ghobject_t>, std::allocator<std::pair<ghobje
ct_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >::_M_erase(std::_Rb_tree_node<std::pair<ghobject_t const, std::__cxx
11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)+0x1f) [0x55b9c4e635df]",
        "(std::_Rb_tree<ghobject_t, std::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::_Select1st<st
d::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::less<ghobject_t>, std::allocator<std::pair<ghobje
ct_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >::_M_erase(std::_Rb_tree_node<std::pair<ghobject_t const, std::__cxx
11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)+0x1f) [0x55b9c4e635df]",
        "(std::_Rb_tree<ghobject_t, std::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::_Select1st<st
d::pair<ghobject_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::less<ghobject_t>, std::allocator<std::pair<ghobje
ct_t const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >::_M_erase(std::_Rb_tree_node<std::pair<ghobject_t const, std::__cxx
11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >*)+0x1f) [0x55b9c4e635df]",
        "/usr/bin/ceph-osd(+0xf93501) [0x55b9c4db6501]",
        "(BlueStore::_collection_list(BlueStore::Collection*, ghobject_t const&, ghobject_t const&, int, bool, std::vector<ghobject_t, std::allocator<ghobject_t> >*,
ghobject_t*)+0x49c) [0x55b9c4df426c]",
        "(BlueStore::collection_list(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, ghobject_t const&, ghobject_t const&, int, std::vector<ghobject_t, std::allo
cator<ghobject_t> >*, ghobject_t*)+0xad) [0x55b9c4df5f2d]",
        "(PGBackend::objects_list_partial(hobject_t const&, int, int, std::vector<hobject_t, std::allocator<hobject_t> >*, hobject_t*)+0x68d) [0x55b9c4abfe4d]",
        "(PgScrubber::select_range()+0x2c2) [0x55b9c4c41ba2]",
        "(PgScrubber::select_range_n_notify()+0x24) [0x55b9c4c426d4]",
        "(Scrub::NewChunk::NewChunk(boost::statechart::state<Scrub::NewChunk, Scrub::ActiveScrubbing, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::
na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, m
pl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::my_context)+0xf8) [0x55b9c4c5b888]",
        "(boost::statechart::simple_state<Scrub::PendingTimer, Scrub::ActiveScrubbing, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, m
pl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::
na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void
const*)+0x16a) [0x55b9c4c6542a]",
        "(boost::statechart::state_machine<Scrub::ScrubMachine, Scrub::NotActive, std::allocator<boost::statechart::none>, boost::statechart::null_exception_translat
or>::process_event(boost::statechart::event_base const&)+0x6b) [0x55b9c4c5763b]",
        "(PgScrubber::send_scrub_resched(unsigned int)+0xef) [0x55b9c4c4ec4f]",
        "(PG::forward_scrub_event(void (ScrubPgIF::*)(unsigned int), unsigned int, std::basic_string_view<char, std::char_traits<char> >)+0x78) [0x55b9c499cf48]",
        "(ceph::osd::scheduler::PGScrubResched::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x32) [0x55b9c4b4a2f2]",
        "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0xd1e) [0x55b9c4905dbe]",
        "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x4ac) [0x55b9c4f8b75c]",
        "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x55b9c4f8ec20]",
        "/lib/x86_64-linux-gnu/libpthread.so.0(+0x76db) [0x7fda0ddb46db]",
        "clone()"
    ]

_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux