On Sun, Oct 23, 2022 at 11:04 PM can zhu <zhucan.k8s@xxxxxxxxx> wrote: > > crash info: > > { > "backtrace": [ > "/lib64/libpthread.so.0(+0x12ce0) [0x7f82e87cece0]", > "(BlueStore::Onode::put()+0x1a3) [0x55bd21a422e3]", > "(std::_Hashtable<ghobject_t, std::pair<ghobject_t const, > boost::intrusive_ptr<BlueStore::Onode> >, > mempool::pool_allocator<(mempool::pool_index_t)4, std::pair<ghobject_t > const, boost::intrusive_ptr<BlueStore::Onode> > >, > std::__detail::_Select1st, std::equal_to<ghobject_t>, > std::hash<ghobject_t>, std::__detail::_Mod_range_hashing, > std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, > std::__detail::_Hashtable_traits<true, false, true> >::_M_erase(unsigned > long, std::__detail::_Hash_node_base*, > std::__detail::_Hash_node<std::pair<ghobject_t const, > boost::intrusive_ptr<BlueStore::Onode> >, true>*)+0x68) [0x55bd21af7bc8]", > "(BlueStore::OnodeSpace::_remove(ghobject_t const&)+0x29b) > [0x55bd21a420eb]", > "(LruOnodeCacheShard::_trim_to(unsigned long)+0xeb) > [0x55bd21afce6b]", > "(BlueStore::OnodeSpace::add(ghobject_t const&, > boost::intrusive_ptr<BlueStore::Onode>&)+0x49d) [0x55bd21a42dfd]", > "(BlueStore::Collection::get_onode(ghobject_t const&, bool, > bool)+0x46a) [0x55bd21a86e1a]", > "(BlueStore::_txc_add_transaction(BlueStore::TransContext*, > ceph::os::Transaction*)+0x1124) [0x55bd21aab2e4]", > > "(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, > std::vector<ceph::os::Transaction, std::allocator<ceph::os::Transaction> > >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x316) > [0x55bd21ac76d6]", > "(non-virtual thunk to > PrimaryLogPG::queue_transactions(std::vector<ceph::os::Transaction, > std::allocator<ceph::os::Transaction> >&, > boost::intrusive_ptr<OpRequest>)+0x58) [0x55bd2170b878]", > > "(ReplicatedBackend::do_repop(boost::intrusive_ptr<OpRequest>)+0xeb0) > [0x55bd218fdff0]", > > "(ReplicatedBackend::_handle_message(boost::intrusive_ptr<OpRequest>)+0x267) > [0x55bd2190e357]", > "(PGBackend::handle_message(boost::intrusive_ptr<OpRequest>)+0x52) > [0x55bd2173ed52]", > "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, > ThreadPool::TPHandle&)+0x5de) [0x55bd216e268e]", > "(OSD::dequeue_op(boost::intrusive_ptr<PG>, > boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x309) > [0x55bd21569fc9]", > "(ceph::osd::scheduler::PGOpItem::run(OSD*, OSDShard*, > boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x68) [0x55bd217c8e78]", > "(OSD::ShardedOpWQ::_process(unsigned int, > ceph::heartbeat_handle_d*)+0xc28) [0x55bd215874c8]", > "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5c4) > [0x55bd21c042a4]", > "(ShardedThreadPool::WorkThreadSharded::entry()+0x14) > [0x55bd21c07184]", > "/lib64/libpthread.so.0(+0x81ca) [0x7f82e87c41ca]", > "clone()" > ], > "ceph_version": "16.2.10", > "crash_id": > "2022-10-21T22:15:37.801458Z_ded9e80e-6c08-4502-8887-0438cdcd4a1c", > "entity_name": "osd.21", > "os_id": "centos", > "os_name": "CentOS Stream", > "os_version": "8", > "os_version_id": "8", > "process_name": "ceph-osd", > "stack_sig": > "a43cb3d3dcfcda8bffe97d30a1cb9c244ba20d595a2c8759a5cc8274781b0020", > "timestamp": "2022-10-21T22:15:37.801458Z", > "utsname_hostname": "node07", > "utsname_machine": "x86_64", > "utsname_release": "3.10.0-1160.45.1.el7.x86_64", > "utsname_sysname": "Linux", > "utsname_version": "#1 SMP Wed Oct 13 17:20:51 UTC 2021" > } > > message info: > [root@node07 ~]# dmesg -T | grep osd > [Sat Oct 22 06:18:46 2022] tp_osd_tp[23817]: segfault at 55bd00000001 ip > 00007f82e931cd6a sp 00007f82c6592f08 error 4 in > libtcmalloc.so.4.5.3[7f82e92e1000+4d000] > _______________________________________________ > ceph-users mailing list -- ceph-users@xxxxxxx > To unsubscribe send an email to ceph-users-leave@xxxxxxx There is currently a known race condition in onode reference counting that affects all versions of Ceph [1][2]. Your backtrace is different from everything else I've seen so far, though the top function being Onode::put under a trimming operation is very suspicious. If you can reproduce this and follow the tracker/report back after the fix, it may be helpful to the community. Cheers, Tyler [1] https://tracker.ceph.com/issues/57895 [2] https://tracker.ceph.com/issues/56382 _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx