Hi Ceph, Upgraded last night from 14.2.1 to 14.2.2, 36 osd with old stats. We’re still repairing stats one by one . But one failed. Hope this helps. CentOS Version: Linux CEPH006 3.10.0-957.10.1.el7.x86_64 #1 SMP Mon Mar 18 15:06:45 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux [root@CEPH006 ~]# ceph-bluestore-tool repair --path /var/lib/ceph/osd/ceph-10 src/central_freelist.cc:333] tcmalloc: allocation failed 8192 terminate called after throwing an instance of 'ceph::buffer::bad_alloc' what(): buffer::bad_alloc *** Caught signal (Aborted) ** in thread 7f823c8e3f00 thread_name:ceph-bluestore- ceph version 14.2.2 (4f8fa0a0024755aae7d95567c63f11d6862d55be) nautilus (stable) 1: (()+0xf5d0) [0x7f8230dab5d0] 2: (gsignal()+0x37) [0x7f822f5762c7] 3: (abort()+0x148) [0x7f822f5779b8] 4: (__gnu_cxx::__verbose_terminate_handler()+0x165) [0x7f822fe857d5] 5: (()+0x5e746) [0x7f822fe83746] 6: (()+0x5e773) [0x7f822fe83773] 7: (()+0x5e993) [0x7f822fe83993] 8: (()+0x250478) [0x7f82328c7478] 9: (ceph::buffer::create_aligned_in_mempool(unsigned int, unsigned int, int)+0x2b1) [0x7f8232bf6791] 10: (ceph::buffer::create_aligned(unsigned int, unsigned int)+0x22) [0x7f8232bf6812] 11: (ceph::buffer::copy(char const*, unsigned int)+0x2c) [0x7f8232bf71cc] 12: (BlueStore::Blob::decode(BlueStore::Collection*, ceph::buffer::v14_2_0::ptr::iterator_impl<true>&, unsigned long, unsigned long*, bool)+0x23e) [0x55ba137eafce] 13: (BlueStore::ExtentMap::decode_some(ceph::buffer::v14_2_0::list&)+0x8d6) [0x55ba137f3536] 14: (BlueStore::ExtentMap::fault_range(KeyValueDB*, unsigned int, unsigned int)+0x2b2) [0x55ba137f3c82] 15: (BlueStore::_fsck(bool, bool)+0x22a5) [0x55ba138577e5] 16: (main()+0x107e) [0x55ba136b3ece] 17: (__libc_start_main()+0xf5) [0x7f822f562495] 18: (()+0x27321f) [0x55ba1379b21f] 2019-07-23 10:14:57.156 7f823c8e3f00 -1 *** Caught signal (Aborted) ** in thread 7f823c8e3f00 thread_name:ceph-bluestore- ceph version 14.2.2 (4f8fa0a0024755aae7d95567c63f11d6862d55be) nautilus (stable) 1: (()+0xf5d0) [0x7f8230dab5d0] 2: (gsignal()+0x37) [0x7f822f5762c7] 3: (abort()+0x148) [0x7f822f5779b8] 4: (__gnu_cxx::__verbose_terminate_handler()+0x165) [0x7f822fe857d5] 5: (()+0x5e746) [0x7f822fe83746] 6: (()+0x5e773) [0x7f822fe83773] 7: (()+0x5e993) [0x7f822fe83993] 8: (()+0x250478) [0x7f82328c7478] 9: (ceph::buffer::create_aligned_in_mempool(unsigned int, unsigned int, int)+0x2b1) [0x7f8232bf6791] 10: (ceph::buffer::create_aligned(unsigned int, unsigned int)+0x22) [0x7f8232bf6812] 11: (ceph::buffer::copy(char const*, unsigned int)+0x2c) [0x7f8232bf71cc] 12: (BlueStore::Blob::decode(BlueStore::Collection*, ceph::buffer::v14_2_0::ptr::iterator_impl<true>&, unsigned long, unsigned long*, bool)+0x23e) [0x55ba137eafce] 13: (BlueStore::ExtentMap::decode_some(ceph::buffer::v14_2_0::list&)+0x8d6) [0x55ba137f3536] 14: (BlueStore::ExtentMap::fault_range(KeyValueDB*, unsigned int, unsigned int)+0x2b2) [0x55ba137f3c82] 15: (BlueStore::_fsck(bool, bool)+0x22a5) [0x55ba138577e5] 16: (main()+0x107e) [0x55ba136b3ece] 17: (__libc_start_main()+0xf5) [0x7f822f562495] 18: (()+0x27321f) [0x55ba1379b21f] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. terminate called recursively Aborted ----------------------- CEPH Startup fail osd 10 fail. ceph version 14.2.2 (4f8fa0a0024755aae7d95567c63f11d6862d55be) nautilus (stable) 1: (()+0xf5d0) [0x7f00ee9045d0] 2: (gsignal()+0x37) [0x7f00ed6f42c7] 3: (abort()+0x148) [0x7f00ed6f59b8] 4: (__gnu_cxx::__verbose_terminate_handler()+0x165) [0x7f00ee0037d5] 5: (()+0x5e746) [0x7f00ee001746] 6: (()+0x5e773) [0x7f00ee001773] 7: (__cxa_rethrow()+0x49) [0x7f00ee0019e9] 8: (std::_Hashtable<osd_reqid_t, std::pair<osd_reqid_t const, pg_log_dup_t*>, std::allocator<std::pair<osd_reqid_t const, pg_log_dup_t*> >, std::__detail::_Select1st, std::equal_to<osd_reqid_t>, std::hash<osd_reqid_t>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true> >::_M_insert_unique_node(unsigned long, unsigned long, std::__detail::_Hash_node<std::pair<osd_reqid_t const, pg_log_dup_t*>, true>*)+0xfd) [0x55e0c6412a8d] 9: (std::__detail::_Map_base<osd_reqid_t, std::pair<osd_reqid_t const, pg_log_dup_t*>, std::allocator<std::pair<osd_reqid_t const, pg_log_dup_t*> >, std::__detail::_Select1st, std::equal_to<osd_reqid_t>, std::hash<osd_reqid_t>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, false, true>, true>::operator[](osd_reqid_t const&)+0x99) [0x55e0c64478c9] 10: (PGLog::merge_log_dups(pg_log_t const&)+0x328) [0x55e0c6441d28] 11: (PGLog::merge_log(pg_info_t&, pg_log_t&, pg_shard_t, pg_info_t&, PGLog::LogEntryHandler*, bool&, bool&)+0xf6e) [0x55e0c644353e] 12: (PG::merge_log(ObjectStore::Transaction&, pg_info_t&, pg_log_t&, pg_shard_t)+0x64) [0x55e0c63a0804] 13: (PG::proc_master_log(ObjectStore::Transaction&, pg_info_t&, pg_log_t&, pg_missing_set<false>&, pg_shard_t)+0x94) [0x55e0c63d1a54] 14: (PG::RecoveryState::GetLog::react(PG::RecoveryState::GotLog const&)+0x97) [0x55e0c63ed567] 15: (boost::statechart::simple_state<PG::RecoveryState::GetLog, PG::RecoveryState::Peering, boost::mpl::list<mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na, mpl_::na>, (boost::statechart::history_mode)0>::react_impl(boost::statechart::event_base const&, void const*)+0x43) [0x55e0c642ec83] 16: (boost::statechart::state_machine<PG::RecoveryState::RecoveryMachine, PG::RecoveryState::Initial, std::allocator<void>, boost::statechart::null_exception_translator>::process_event(boost::statechart::event_base const&)+0x136) [0x55e0c640e316] 17: (PG::do_peering_event(std::shared_ptr<PGPeeringEvent>, PG::RecoveryCtx*)+0x119) [0x55e0c63fd909] 18: (OSD::dequeue_peering_evt(OSDShard*, PG*, std::shared_ptr<PGPeeringEvent>, ThreadPool::TPHandle&)+0x1b4) [0x55e0c6336774] 19: (PGPeeringItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x50) [0x55e0c65ab490] 20: (OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x9f4) [0x55e0c632aef4] 21: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x433) [0x55e0c6929ce3] 22: (ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x55e0c692cd80] 23: (()+0x7dd5) [0x7f00ee8fcdd5] 24: (clone()+0x6d) [0x7f00ed7bc02d] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. --- begin dump of recent events --- 2019-07-23 10:25:38.003 7f00eb5e0700 -1 --2- [v2:172.16.2.15:6827/4125016,v1:172.16.2.15:6830/4125016] >> 172.16.2.8:0/2945059631 conn(0x55e12ff66400 0x55e12ff64c00 crc :-1 s=THROTTLE_DONE pgs=1761 cs=0 l=1 rx=0 tx=0).run_continuation failed decoding of frame header: buffer::bad_alloc KR, Manuel |
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com