Last week I asked about a rogue inode that was causing ceph-mds to segfault during replay. We didn't get any suggestions from this list, so we have been familiarizing ourselves with the ceph source code, and have added the following patch:
-2> 2019-10-18 16:19:06.934 7faefa470700 0 mds.0.cache.ino(0x10000995e63) "/unimportant/path/we/can/tolerate/losing/compat.py"10995216789470"2018-03-24 03:18:17.621969""2018-03-24 03:18:17.620969"3318855521001{
"dir_hash": 0
}
{
"stripe_unit": 4194304,
"stripe_count": 1,
"object_size": 4194304,
"pool_id": 1,
"pool_ns": ""
}
[]
3411844674407370955161500"2015-01-27 16:01:52.467669""2018-03-24 03:18:17.621969"21-1[]
{
"version": 0,
"mtime": "0.000000",
"num_files": 0,
"num_subdirs": 0
}
{
"version": 0,
"rbytes": 34,
"rfiles": 1,
"rsubdirs": 0,
"rsnaps": 0,
"rctime": "0.000000"
}
{
"version": 0,
"rbytes": 34,
"rfiles": 1,
"rsubdirs": 0,
"rsnaps": 0,
"rctime": "0.000000"
}
2540123""""[]
{
"splits": []
}
true{
"replicas": {}
}
{
"authority": [
0,
-2
],
"replica_nonce": 0
}
0falsefalse{}
0{
"gather_set": [],
"state": "lock",
"is_leased": false,
"num_rdlocks": 0,
"num_wrlocks": 0,
"num_xlocks": 0,
"xlock_by": {}
}
{}
{}
{}
{}
{}
{}
{}
{}
{}
[
"auth"
]
[]
-1-1[]
[]
-1> 2019-10-18 16:19:06.964 7faefa470700 -1 /opt/app-root/src/ceph/src/mds/CInode.cc: In function 'CDir* CInode::get_or_open_dirfrag(MDCache*, frag_t)' thread 7faefa470700 time 2019-10-18 16:19:06.934662
/opt/app-root/src/ceph/src/mds/CInode.cc: 746: FAILED ceph_assert(is_dir())
ceph version 14.2.4 (75f4de193b3ea58512f204623e6c5a16e6c1e1ba) nautilus (stable)
1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1aa) [0x7faf0a9ce39e]
2: (()+0x12a8620) [0x7faf0a9ce620]
3: (CInode::get_or_open_dirfrag(MDCache*, frag_t)+0x253) [0x557562a4b1ad]
4: (OpenFileTable::_prefetch_dirfrags()+0x4db) [0x557562b63d63]
5: (OpenFileTable::_open_ino_finish(inodeno_t, int)+0x16a) [0x557562b63720]
6: (C_OFT_OpenInoFinish::finish(int)+0x2d) [0x557562b67699]
7: (Context::complete(int)+0x27) [0x557562657fbf]
8: (MDSContext::complete(int)+0x152) [0x557562b04aa4]
9: (void finish_contexts<std::vector<MDSContext*, std::allocator<MDSContext*> > >(CephContext*, std::vector<MDSContext*, std::allocator<MDSContext*> >&, int)+0x2c8) [0x557562660e36]
10: (MDCache::open_ino_finish(inodeno_t, MDCache::open_ino_info_t&, int)+0x185) [0x557562844c4d]
11: (MDCache::_open_ino_backtrace_fetched(inodeno_t, ceph::buffer::v14_2_0::list&, int)+0xbbf) [0x557562842785]
12: (C_IO_MDC_OpenInoBacktraceFetched::finish(int)+0x37) [0x557562886a31]
13: (Context::complete(int)+0x27) [0x557562657fbf]
14: (MDSContext::complete(int)+0x152) [0x557562b04aa4]
15: (MDSIOContextBase::complete(int)+0x345) [0x557562b0522d]
16: (Finisher::finisher_thread_entry()+0x38b) [0x7faf0a9033e1]
17: (Finisher::FinisherThread::entry()+0x1c) [0x5575626a2772]
18: (Thread::entry_wrapper()+0x78) [0x7faf0a97203c]
19: (Thread::_entry_func(void*)+0x18) [0x7faf0a971fba]
20: (()+0x7dd5) [0x7faf07844dd5]
21: (clone()+0x6d) [0x7faf064f502d]