We have a small Pacific 16.2.7 test cluster that has been ticking over
for a couple of years with no problems whatever. The last "event" was 14
days ago when I was testing some OSD replacement procedures - nothing
remarkable.
At 0146 this morning though mon03 signalled an abort in the RocksDB
code. The monitor crashed, and systemd successfully restarted it 10
seconds later. Although its difficult to tell if any RBD VMs were doing
much it is unlikely. Deep-scrubs are likely to have been running. There
is nothing of interest in the OS logs.
Config summary:
* Built manually (not using cephadm)
* Debian 10 (buster)
* Host mon01 - mon, mgr, mds, rgw
* Host mon02 - mon, mgr, mds, rgw
* Host mon03 - mon
* Hosts osd01-03 - each have 2 Optane NVMe for HDD DB/WAL, 24 x HDD, 2
x NVMe OSD
* Running at very low load and capacity utilisation
It's working fine now, but wondering if anyone knows what might have
happened and if there is any lurking problem that should be looked at.
Crash info (slightly sanitised) below...
Thanks, Chris
ceph@xxxxxxmon03:~$ ceph crash info 2022-02-24T01:46:41.241025Z_7bcaa4fa-d202-4e48-91ac-f3070493bc73
{
"backtrace": [
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x12730) [0x7fb338e7d730]",
"gsignal()",
"abort()",
"/lib/x86_64-linux-gnu/libc.so.6(+0x2240f) [0x7fb33894b40f]",
"/lib/x86_64-linux-gnu/libc.so.6(+0x30102) [0x7fb338959102]",
"(rocksdb::BlockBasedTableBuilder::Add(rocksdb::Slice const&, rocksdb::Slice const&)+0x119) [0x5633d27a792f]",
"(rocksdb::CompactionJob::ProcessKeyValueCompaction(rocksdb::CompactionJob::SubcompactionState*)+0xaf8) [0x5633d275cf04]",
"(rocksdb::CompactionJob::Run()+0x235) [0x5633d275adfb]",
"(rocksdb::DBImpl::BackgroundCompaction(bool*, rocksdb::JobContext*, rocksdb::LogBuffer*, rocksdb::DBImpl::PrepickedCompaction*, rocksdb::Env::Priority)+0x248a) [0x5633d248a74a]",
"(rocksdb::DBImpl::BackgroundCallCompaction(rocksdb::DBImpl::PrepickedCompaction*, rocksdb::Env::Priority)+0x20d) [0x5633d2487a93]",
"(rocksdb::DBImpl::BGWorkCompaction(void*)+0xc5) [0x5633d248637d]",
"(void std::__invoke_impl<void, void (*&)(void*), void*&>(std::__invoke_other, void (*&)(void*), void*&)+0x34) [0x5633d26e7f6e]",
"(std::__invoke_result<void (*&)(void*), void*&>::type std::__invoke<void (*&)(void*), void*&>(void (*&)(void*), void*&)+0x37) [0x5633d26e7ad3]",
"(void std::_Bind<void (*(void*))(void*)>::__call<void, , 0ul>(std::tuple<>&&, std::_Index_tuple<0ul>)+0x48) [0x5633d26e71c2]",
"(void std::_Bind<void (*(void*))(void*)>::operator()<, void>()+0x24) [0x5633d26e6318]",
"(std::_Function_handler<void (), std::_Bind<void (*(void*))(void*)> >::_M_invoke(std::_Any_data const&)+0x20) [0x5633d26e5404]",
"(std::function<void ()>::operator()() const+0x32) [0x5633d242c58c]",
"(rocksdb::ThreadPoolImpl::Impl::BGThread(unsigned long)+0x26b) [0x5633d26e1941]",
"(rocksdb::ThreadPoolImpl::Impl::BGThreadWrapper(void*)+0x108) [0x5633d26e1aa4]",
"(void std::__invoke_impl<void, void (*)(void*), rocksdb::BGThreadMetadata*>(std::__invoke_other, void (*&&)(void*), rocksdb::BGThreadMetadata*&&)+0x34) [0x5633d26e4bdf]",
"(std::__invoke_result<void (*)(void*), rocksdb::BGThreadMetadata*>::type std::__invoke<void (*)(void*), rocksdb::BGThreadMetadata*>(void (*&&)(void*), rocksdb::BGThreadMetadata*&&)+0x37) [0x5633d26e3dbf]",
"(decltype (__invoke((_S_declval<0ul>)(), (_S_declval<1ul>)())) std::thread::_Invoker<std::tuple<void (*)(void*), rocksdb::BGThreadMetadata*> >::_M_invoke<0ul, 1ul>(std::_Index_tuple<0ul, 1ul>)+0x43) [0x5633d26e8779]",
"(std::thread::_Invoker<std::tuple<void (*)(void*), rocksdb::BGThreadMetadata*> >::operator()()+0x18) [0x5633d26e8734]",
"(std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (*)(void*), rocksdb::BGThreadMetadata*> > >::_M_run()+0x1c) [0x5633d26e8718]",
"/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbbb2f) [0x7fb338d44b2f]",
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x7fa3) [0x7fb338e72fa3]",
"clone()"
],
"ceph_version": "16.2.7",
"crash_id": "2022-02-24T01:46:41.241025Z_7bcaa4fa-d202-4e48-91ac-f3070493bc73",
"entity_name": "mon.xxxxxxmon03",
"os_id": "10",
"os_name": "Debian GNU/Linux 10 (buster)",
"os_version": "10 (buster)",
"os_version_id": "10",
"process_name": "ceph-mon",
"stack_sig": "f5274691c6982e320f630eb9e025f3db660bd3a110bd7ec1400c7ae121feebb7",
"timestamp": "2022-02-24T01:46:41.241025Z",
"utsname_hostname": "xxxxxxmon03.x.y.z",
"utsname_machine": "x86_64",
"utsname_release": "4.19.0-18-amd64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Debian 4.19.208-1 (2021-09-29)"
}
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx