Hi ,
Today we got some osd
that crash after scrub. Version 14.2.1
2019-05-17 12:49:40.955
7fd980d8fd80 4 rocksdb: EVENT_LOG_v1 {"time_micros":
1558090180955778, "job": 1, "event": "recovery_finished"}
2019-05-17 12:49:40.967
7fd980d8fd80 4 rocksdb:
[/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/rocksdb/db/db_impl_open.cc:1287]
DB pointer 0x55cbfcfc9000
2019-05-17 12:49:40.967
7fd980d8fd80 1 bluestore(/var/lib/ceph/osd/ceph-7) _open_db
opened rocksdb path db options
compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152
2019-05-17 12:49:40.967
7fd980d8fd80 1 bluestore(/var/lib/ceph/osd/ceph-7)
_upgrade_super from 2, latest 2
2019-05-17 12:49:40.967
7fd980d8fd80 1 bluestore(/var/lib/ceph/osd/ceph-7)
_upgrade_super done
2019-05-17 12:49:41.090
7fd980d8fd80 0 <cls>
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/cls/cephfs/cls_cephfs.cc:197:
loading cephfs
2019-05-17 12:49:41.092
7fd980d8fd80 0 <cls>
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/cls/hello/cls_hello.cc:296:
loading cls_hello
2019-05-17 12:49:41.093
7fd980d8fd80 0 _get_class not permitted to load kvs
2019-05-17 12:49:41.096
7fd980d8fd80 0 _get_class not permitted to load lua
2019-05-17 12:49:41.121
7fd980d8fd80 0 _get_class not permitted to load sdk
2019-05-17 12:49:41.124
7fd980d8fd80 0 osd.7 135670 crush map has features
283675107524608, adjusting msgr requires for clients
2019-05-17 12:49:41.124
7fd980d8fd80 0 osd.7 135670 crush map has features
283675107524608 was 8705, adjusting msgr requires for mons
2019-05-17 12:49:41.124
7fd980d8fd80 0 osd.7 135670 crush map has features
3026702624700514304, adjusting msgr requires for osds
2019-05-17 12:49:50.430
7fd980d8fd80 0 osd.7 135670 load_pgs
2019-05-17 12:50:09.302
7fd980d8fd80 0 osd.7 135670 load_pgs opened 201 pgs
2019-05-17 12:50:09.303
7fd980d8fd80 0 osd.7 135670 using weightedpriority op queue
with priority op cut off at 64.
2019-05-17 12:50:09.324
7fd980d8fd80 -1 osd.7 135670 log_to_monitors {default=true}
2019-05-17 12:50:09.361
7fd980d8fd80 -1 osd.7 135670 mon_cmd_maybe_osd_create fail:
'osd.7 has already bound to class 'archive', can not reset
class to 'hdd'; use 'ceph osd crush rm-device-class
<id>' to remove old class first': (16) Device or
resource busy
2019-05-17 12:50:09.365
7fd980d8fd80 0 osd.7 135670 done with init, starting boot
process
2019-05-17 12:50:09.371
7fd97339d700 -1 osd.7 135670 set_numa_affinity unable to
identify public interface 'vlan.4094' numa node: (2) No such
file or directory
2019-05-17 12:50:16.443
7fd95f375700 -1 bdev(0x55cbfcec4e00
/var/lib/ceph/osd/ceph-7/block) read_random
0x5428527b5be~15b3 error: (14) Bad address
2019-05-17 12:50:16.467
7fd95f375700 -1
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/os/bluestore/BlueFS.cc:
In function 'int BlueFS::_read_random(BlueFS::FileReader*,
uint64_t, size_t, char*)' thread 7fd95f375700 time
2019-05-17 12:50:16.445954
/home/jenkins-build/build/workspace/ceph-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos7/DIST/centos7/MACHINE_SIZE/huge/release/14.2.1/rpm/el7/BUILD/ceph-14.2.1/src/os/bluestore/BlueFS.cc:
1337: FAILED ceph_assert(r == 0)
ceph version
14.2.1 (d555a9489eb35f84f2e1ef49b77e19da9d113972) nautilus
(stable)
1:
(ceph::__ceph_assert_fail(char const*, char const*, int,
char const*)+0x14a) [0x55cbf14e265c]
2:
(ceph::__ceph_assertf_fail(char const*, char const*, int,
char const*, char const*, ...)+0) [0x55cbf14e282a]
3:
(BlueFS::_read_random(BlueFS::FileReader*, unsigned long,
unsigned long, char*)+0x71a) [0x55cbf1b8fd6a]
4:
(BlueRocksRandomAccessFile::Read(unsigned long, unsigned
long, rocksdb::Slice*, char*) const+0x20) [0x55cbf1bb8440]
5:
(rocksdb::RandomAccessFileReader::Read(unsigned long,
unsigned long, rocksdb::Slice*, char*) const+0x960)
[0x55cbf21e3ba0]
6:
(rocksdb::BlockFetcher::ReadBlockContents()+0x3e7)
[0x55cbf219dc27]
7: (()+0x11146a4)
[0x55cbf218a6a4]
8:
(rocksdb::BlockBasedTable::MaybeLoadDataBlockToCache(rocksdb::FilePrefetchBuffer*,
rocksdb::BlockBasedTable::Rep*, rocksdb::ReadOptions
const&, rocksdb::BlockHandle const&, rocksdb::Slice,
rocksdb::BlockBasedTable::CachableEntry<rocksdb::Block>*, bool,
rocksdb::GetContext*)+0x2cc) [0x55cbf218c63c]
9:
(rocksdb::DataBlockIter*
rocksdb::BlockBasedTable::NewDataBlockIterator<rocksdb::DataBlockIter>(rocksdb::BlockBasedTable::Rep*,
rocksdb::ReadOptions const&, rocksdb::BlockHandle
const&, rocksdb::DataBlockIter*, bool, bool, bool,
rocksdb::GetContext*, rocksdb::Status,
rocksdb::FilePrefetchBuffer*)+0x169) [0x55cbf2199b29]
10:
(rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter,
rocksdb::Slice>::InitDataBlock()+0xc8) [0x55cbf219b588]
11:
(rocksdb::BlockBasedTableIterator<rocksdb::DataBlockIter,
rocksdb::Slice>::FindKeyForward()+0x8d) [0x55cbf219b89d]
12: (()+0x10adde9)
[0x55cbf2123de9]
13:
(rocksdb::MergingIterator::Next()+0x44) [0x55cbf21b27c4]
14:
(rocksdb::DBIter::Next()+0xdf) [0x55cbf20a85cf]
15:
(RocksDBStore::RocksDBWholeSpaceIteratorImpl::next()+0x2d)
[0x55cbf1b1ca8d]
16:
(BlueStore::_collection_list(BlueStore::Collection*,
ghobject_t const&, ghobject_t const&, int,
std::vector<ghobject_t, std::allocator<ghobject_t>
>*, ghobject_t*)+0xe26) [0x55cbf1a8f496]
17:
(BlueStore::collection_list(boost::intrusive_ptr<ObjectStore::CollectionImpl>&,
ghobject_t const&, ghobject_t const&, int,
std::vector<ghobject_t, std::allocator<ghobject_t>
>*, ghobject_t*)+0x9b) [0x55cbf1a9093b]
18:
(PGBackend::objects_list_range(hobject_t const&,
hobject_t const&, std::vector<hobject_t,
std::allocator<hobject_t> >*,
std::vector<ghobject_t, std::allocator<ghobject_t>
>*)+0x147) [0x55cbf182f3f7]
19:
(PG::build_scrub_map_chunk(ScrubMap&,
ScrubMapBuilder&, hobject_t, hobject_t, bool,
ThreadPool::TPHandle&)+0x28a) [0x55cbf16dcfba]
20:
(PG::chunky_scrub(ThreadPool::TPHandle&)+0x169c)
[0x55cbf170b6bc]
21: (PG::scrub(unsigned
int, ThreadPool::TPHandle&)+0xaf) [0x55cbf170c6ff]
22: (PGScrub::run(OSD*,
OSDShard*, boost::intrusive_ptr<PG>&,
ThreadPool::TPHandle&)+0x12) [0x55cbf18b94d2]
23:
(OSD::ShardedOpWQ::_process(unsigned int,
ceph::heartbeat_handle_d*)+0x9f4) [0x55cbf163cb44]
24:
(ShardedThreadPool::shardedthreadpool_worker(unsigned
int)+0x433) [0x55cbf1c36e93]
25:
(ShardedThreadPool::WorkThreadSharded::entry()+0x10)
[0x55cbf1c39f30]
26: (()+0x7dd5)
[0x7fd97d9d9dd5]
27: (clone()+0x6d)
[0x7fd97c898ead]
Regards
Manuel