Hi Folks, I'm having trouble getting some of my OSDs to boot. At some point, these disks got very full. I fixed the rule that was causing that, and they are on average ~30% full now. I'm getting the following in my logs: -1> 2019-05-08 16:05:18.956 7fdc7adbbf00 -1 /home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/include/interval_set.h: In function 'void interval_set<T, Map>::insert(T, T, T*, T*) [with T = long unsigned int; Map = std::map<long unsigned int, long unsigned int, std::less<long unsigned int>, std::allocator<std::pair<const long unsigned int, long unsigned int> > >]' thread 7fdc7adbbf00 time 2019-05-08 16:05:18.953372 /home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/include/interval_set.h: 490: FAILED ceph_assert(p->first > start+len) ceph version 14.2.1 (d555a9489eb35f84f2e1ef49b77e19da9d113972) nautilus (stable) 1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x14a) [0x7fdc70daa676] 2: (ceph::__ceph_assertf_fail(char const*, char const*, int, char const*, char const*, ...)+0) [0x7fdc70daa844] 3: (interval_set<unsigned long, std::map<unsigned long, unsigned long, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, unsigned long> > > >::insert(unsigned long, unsigned long, unsigned long*, unsigned long*)+0x45f) [0x55b8960e03df] 4: (BlueStore::allocate_bluefs_freespace(unsigned long, unsigned long, std::vector<bluestore_pextent_t, mempool::pool_allocator<(mempool::pool_index_t)4, bluestore_pextent_t> >*)+0x74e) [0x55b89611d13e] 5: (BlueFS::_expand_slow_device(unsigned long, std::vector<bluestore_pextent_t, mempool::pool_allocator<(mempool::pool_index_t)4, bluestore_pextent_t> >&)+0x111) [0x55b8960c8211] 6: (BlueFS::_allocate(unsigned char, unsigned long, bluefs_fnode_t*)+0x68b) [0x55b8960c8f7b] 7: (BlueFS::_allocate(unsigned char, unsigned long, bluefs_fnode_t*)+0x362) [0x55b8960c8c52] 8: (BlueFS::_flush_range(BlueFS::FileWriter*, unsigned long, unsigned long)+0xe5) [0x55b8960c95d5] 9: (BlueFS::_flush(BlueFS::FileWriter*, bool)+0x10b) [0x55b8960cb43b] 10: (BlueRocksWritableFile::Flush()+0x3d) [0x55b8962bdfcd] 11: (rocksdb::WritableFileWriter::Flush()+0x19e) [0x55b896531a4e] 12: (rocksdb::WritableFileWriter::Sync(bool)+0x2e) [0x55b896531d2e] 13: (rocksdb::BuildTable(std::string const&, rocksdb::Env*, rocksdb::ImmutableCFOptions const&, rocksdb::MutableCFOptions const&, rocksdb::EnvOptions const&, rocksdb::TableCache*, rocksdb::InternalIteratorBase<rocksdb::Slice>*, std::unique_ptr<rocksdb::InternalIteratorBase<rocksdb::Slice>, std::default_delete<rocksdb::InternalIteratorBase<rocksdb::Slice> > >, rocksdb::FileMetaData*, rocksdb::InternalKeyComparator const&, std::vector<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> >, std::allocator<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> > > > const*, unsigned int, std::string const&, std::vector<unsigned long, std::allocator<unsigned long> >, unsigned long, rocksdb::SnapshotChecker*, rocksdb::CompressionType, rocksdb::CompressionOptions const&, bool, rocksdb::InternalStats*, rocksdb::TableFileCreationReason, rocksdb::EventLogger*, int, rocksdb::Env::IOPriority, rocksdb::TableProperties*, int, unsigned long, unsigned long, rocksdb::Env::WriteLifeTimeHint)+0x2368) [0x55b89655fb68] 14: (rocksdb::DBImpl::WriteLevel0TableForRecovery(int, rocksdb::ColumnFamilyData*, rocksdb::MemTable*, rocksdb::VersionEdit*)+0xc66) [0x55b8963d48c6] 15: (rocksdb::DBImpl::RecoverLogFiles(std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long*, bool)+0x1dce) [0x55b8963d6f1e] 16: (rocksdb::DBImpl::Recover(std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, bool, bool, bool)+0x809) [0x55b8963d7db9] 17: (rocksdb::DBImpl::Open(rocksdb::DBOptions const&, std::string const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**, bool, bool)+0x658) [0x55b8963d8bc8] 18: (rocksdb::DB::Open(rocksdb::DBOptions const&, std::string const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**)+0x24) [0x55b8963da3a4] 19: (RocksDBStore::do_open(std::ostream&, bool, bool, std::vector<KeyValueDB::ColumnFamily, std::allocator<KeyValueDB::ColumnFamily> > const*)+0x1660) [0x55b8961c2a80] 20: (BlueStore::_open_db(bool, bool, bool)+0xf8e) [0x55b89611b37e] 21: (BlueStore::_open_db_and_around(bool)+0x165) [0x55b8961388b5] 22: (BlueStore::_fsck(bool, bool)+0xe5c) [0x55b8961692dc] 23: (main()+0x107e) [0x55b895fc682e] 24: (__libc_start_main()+0xf5) [0x7fdc6da4e3d5] 25: (()+0x2718cf) [0x55b8960ac8cf] 0> 2019-05-08 16:05:18.960 7fdc7adbbf00 -1 *** Caught signal (Aborted) ** in thread 7fdc7adbbf00 thread_name:ceph-bluestore- ceph version 14.2.1 (d555a9489eb35f84f2e1ef49b77e19da9d113972) nautilus (stable) 1: (()+0xf5d0) [0x7fdc6f2905d0] 2: (gsignal()+0x37) [0x7fdc6da62207] 3: (abort()+0x148) [0x7fdc6da638f8] 4: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x199) [0x7fdc70daa6c5] 5: (ceph::__ceph_assertf_fail(char const*, char const*, int, char const*, char const*, ...)+0) [0x7fdc70daa844] 6: (interval_set<unsigned long, std::map<unsigned long, unsigned long, std::less<unsigned long>, std::allocator<std::pair<unsigned long const, unsigned long> > > >::insert(unsigned long, unsigned long, unsigned long*, unsigned long*)+0x45f) [0x55b8960e03df] 7: (BlueStore::allocate_bluefs_freespace(unsigned long, unsigned long, std::vector<bluestore_pextent_t, mempool::pool_allocator<(mempool::pool_index_t)4, bluestore_pextent_t> >*)+0x74e) [0x55b89611d13e] 8: (BlueFS::_expand_slow_device(unsigned long, std::vector<bluestore_pextent_t, mempool::pool_allocator<(mempool::pool_index_t)4, bluestore_pextent_t> >&)+0x111) [0x55b8960c8211] 9: (BlueFS::_allocate(unsigned char, unsigned long, bluefs_fnode_t*)+0x68b) [0x55b8960c8f7b] 10: (BlueFS::_allocate(unsigned char, unsigned long, bluefs_fnode_t*)+0x362) [0x55b8960c8c52] 11: (BlueFS::_flush_range(BlueFS::FileWriter*, unsigned long, unsigned long)+0xe5) [0x55b8960c95d5] 12: (BlueFS::_flush(BlueFS::FileWriter*, bool)+0x10b) [0x55b8960cb43b] 13: (BlueRocksWritableFile::Flush()+0x3d) [0x55b8962bdfcd] 14: (rocksdb::WritableFileWriter::Flush()+0x19e) [0x55b896531a4e] 15: (rocksdb::WritableFileWriter::Sync(bool)+0x2e) [0x55b896531d2e] 16: (rocksdb::BuildTable(std::string const&, rocksdb::Env*, rocksdb::ImmutableCFOptions const&, rocksdb::MutableCFOptions const&, rocksdb::EnvOptions const&, rocksdb::TableCache*, rocksdb::InternalIteratorBase<rocksdb::Slice>*, std::unique_ptr<rocksdb::InternalIteratorBase<rocksdb::Slice>, std::default_delete<rocksdb::InternalIteratorBase<rocksdb::Slice> > >, rocksdb::FileMetaData*, rocksdb::InternalKeyComparator const&, std::vector<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> >, std::allocator<std::unique_ptr<rocksdb::IntTblPropCollectorFactory, std::default_delete<rocksdb::IntTblPropCollectorFactory> > > > const*, unsigned int, std::string const&, std::vector<unsigned long, std::allocator<unsigned long> >, unsigned long, rocksdb::SnapshotChecker*, rocksdb::CompressionType, rocksdb::CompressionOptions const&, bool, rocksdb::InternalStats*, rocksdb::TableFileCreationReason, rocksdb::EventLogger*, int, rocksdb::Env::IOPriority, rocksdb::TableProperties*, int, unsigned long, unsigned long, rocksdb::Env::WriteLifeTimeHint)+0x2368) [0x55b89655fb68] 17: (rocksdb::DBImpl::WriteLevel0TableForRecovery(int, rocksdb::ColumnFamilyData*, rocksdb::MemTable*, rocksdb::VersionEdit*)+0xc66) [0x55b8963d48c6] 18: (rocksdb::DBImpl::RecoverLogFiles(std::vector<unsigned long, std::allocator<unsigned long> > const&, unsigned long*, bool)+0x1dce) [0x55b8963d6f1e] 19: (rocksdb::DBImpl::Recover(std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, bool, bool, bool)+0x809) [0x55b8963d7db9] 20: (rocksdb::DBImpl::Open(rocksdb::DBOptions const&, std::string const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**, bool, bool)+0x658) [0x55b8963d8bc8] 21: (rocksdb::DB::Open(rocksdb::DBOptions const&, std::string const&, std::vector<rocksdb::ColumnFamilyDescriptor, std::allocator<rocksdb::ColumnFamilyDescriptor> > const&, std::vector<rocksdb::ColumnFamilyHandle*, std::allocator<rocksdb::ColumnFamilyHandle*> >*, rocksdb::DB**)+0x24) [0x55b8963da3a4] 22: (RocksDBStore::do_open(std::ostream&, bool, bool, std::vector<KeyValueDB::ColumnFamily, std::allocator<KeyValueDB::ColumnFamily> > const*)+0x1660) [0x55b8961c2a80] 23: (BlueStore::_open_db(bool, bool, bool)+0xf8e) [0x55b89611b37e] 24: (BlueStore::_open_db_and_around(bool)+0x165) [0x55b8961388b5] 25: (BlueStore::_fsck(bool, bool)+0xe5c) [0x55b8961692dc] 26: (main()+0x107e) [0x55b895fc682e] 27: (__libc_start_main()+0xf5) [0x7fdc6da4e3d5] 28: (()+0x2718cf) [0x55b8960ac8cf] NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. The osds were provisioned using ceph-volume prepare --dmcrypt --data /dev/sdx It looks to me like one of the db/wal/main claimed all space while the osd was full and hasn't released it -- leaving none left for the others. I've tried extracting pgs with ceph-objectstore tool, but get the same stack trace. Any help would be greatly appreciated! -Paul _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com