Hi Luke,
highly likely this is caused by the issue covered
https://tracker.ceph.com/issues/53906
Unfortunately it looks like we missed proper backport in Pacific.
You can apparently work around the issue by setting
'bluestore_volume_selection_policy' config parameter to rocksdb_original.
The potential implication of that "tuning" is a less effective free
space usage for DB volume - RocksDB/BlueFS might initiate data spillover
to main (slow) device despite having available free space at standalone
DB volume. Which in turn might cause some performance regression.
Relevant alert will pop up if such a spillover takes place .
The above consequences are not highly likely to occur though. And they
are rather minor most of the time so I would encourage you to try that
if OSD crashes are that common.
Thanks,
Igor
On 21/09/2023 17:48, Luke Hall wrote:
Hi,
Since the recent update to 16.2.14-1~bpo11+1 on Debian Bullseye I've
started seeing OSD crashes being registered almost daily across all
six physical machines (6xOSD disks per machine). There's a --block-db
for each osd on a LV from an NVMe.
If anyone has any idea what might be causing these I'd appreciate some
insight. Happy to provide any other info which might be useful.
Thanks,
Luke
{
"assert_condition": "cur2 >= p.length",
"assert_file": "./src/os/bluestore/BlueStore.h",
"assert_func": "virtual void
RocksDBBlueFSVolumeSelector::sub_usage(void*, const bluefs_fnode_t&)",
"assert_line": 3875,
"assert_msg": "./src/os/bluestore/BlueStore.h: In function
'virtual void RocksDBBlueFSVolumeSelector::sub_usage(void*, const
bluefs_fnode_t&)' thread 7f7f54f25700 time
2023-09-20T14:24:00.455721+0100\n./src/os/bluestore/BlueStore.h: 3875:
FAILED ceph_assert(cur2 >= p.length)\n",
"assert_thread_name": "bstore_kv_sync",
"backtrace": [
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x13140)
[0x7f7f68632140]",
"gsignal()",
"abort()",
"(ceph::__ceph_assert_fail(char const*, char const*, int, char
const*)+0x16e) [0x55b22a49b5fa]",
"/usr/bin/ceph-osd(+0xac673b) [0x55b22a49b73b]",
"(RocksDBBlueFSVolumeSelector::sub_usage(void*, bluefs_fnode_t
const&)+0x11e) [0x55b22ab0077e]",
"(BlueFS::_flush_range_F(BlueFS::FileWriter*, unsigned long,
unsigned long)+0x5bd) [0x55b22ab9b8ed]",
"(BlueFS::_flush_F(BlueFS::FileWriter*, bool, bool*)+0x9a)
[0x55b22ab9bd7a]",
"(BlueFS::fsync(BlueFS::FileWriter*)+0x79) [0x55b22aba97a9]",
"(BlueRocksWritableFile::Sync()+0x15) [0x55b22abbf405]",
"(rocksdb::LegacyWritableFileWrapper::Sync(rocksdb::IOOptions const&,
rocksdb::IODebugContext*)+0x3f) [0x55b22b0914d1]",
"(rocksdb::WritableFileWriter::SyncInternal(bool)+0x1f4)
[0x55b22b26b7c6]",
"(rocksdb::WritableFileWriter::Sync(bool)+0x18c)
[0x55b22b26b1f8]",
"(rocksdb::DBImpl::WriteToWAL(rocksdb::WriteThread::WriteGroup const&,
rocksdb::log::Writer*, unsigned long*, bool, bool, unsigned
long)+0x366) [0x55b22b0e4a98]",
"(rocksdb::DBImpl::WriteImpl(rocksdb::WriteOptions const&,
rocksdb::WriteBatch*, rocksdb::WriteCallback*, unsigned long*,
unsigned long, bool, unsigned long*, unsigned long,
rocksdb::PreReleaseCallback*)+0x12cc) [0x55b22b0e0c5a]",
"(rocksdb::DBImpl::Write(rocksdb::WriteOptions const&,
rocksdb::WriteBatch*)+0x4a) [0x55b22b0df92a]",
"(RocksDBStore::submit_common(rocksdb::WriteOptions&,
std::shared_ptr<KeyValueDB::TransactionImpl>)+0x82) [0x55b22b036c42]",
"(RocksDBStore::submit_transaction_sync(std::shared_ptr<KeyValueDB::TransactionImpl>)+0x96)
[0x55b22b037cc6]",
"(BlueStore::_kv_sync_thread()+0x1201) [0x55b22aafc891]",
"(BlueStore::KVSyncThread::entry()+0xd) [0x55b22ab2792d]",
"/lib/x86_64-linux-gnu/libpthread.so.0(+0x7ea7)
[0x7f7f68626ea7]",
"clone()"
],
"ceph_version": "16.2.14",
"crash_id":
"2023-09-20T13:24:00.562318Z_beb5c664-9ffb-4a4e-8c61-166865fd4e0b",
"entity_name": "osd.8",
"os_id": "11",
"os_name": "Debian GNU/Linux 11 (bullseye)",
"os_version": "11 (bullseye)",
"os_version_id": "11",
"process_name": "ceph-osd",
"stack_sig":
"90d1fb6954f0f5b1e98659a93a1b9ce5a5a42cd5e0b2990a65dc336567adcb26",
"timestamp": "2023-09-20T13:24:00.562318Z",
"utsname_hostname": "cphosd02",
"utsname_machine": "x86_64",
"utsname_release": "5.10.0-23-amd64",
"utsname_sysname": "Linux",
"utsname_version": "#1 SMP Debian 5.10.179-1 (2023-05-12)"
}
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx