Hi, I've deployed a ceph-quincy for HPC. Recently, I always encounter the problem of ceph-fuse crash kernel version is 4.18.0-348.el8.0.2.x86_64 here is part of ceph-fuse log: -59> 2023-06-28T09:51:00.452+0800 155546ff7700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -58> 2023-06-28T09:51:00.452+0800 15554cc49700 3 client.159239 ll_opendir 0x10003e1408d.head -57> 2023-06-28T09:51:00.452+0800 15554cc49700 3 client.159239 may_open 0x1554e79123d0 = 0 -56> 2023-06-28T09:51:00.452+0800 15554cc49700 3 client.159239 ll_opendir 0x10003e1408d.head = 0 (0x155328079380) -55> 2023-06-28T09:51:00.453+0800 1555473f9700 3 client.159239 seekdir(0x155328079380, 0) -54> 2023-06-28T09:51:00.452+0800 155546bf5700 5 client.159239 put_cap_ref dropped last FILE_CACHE ref on 0x20004a6e548.head(faked_ino=0 nref=14 ll_ref=2 cap_refs={1024=0,2048=1} open={1=1} mode=100644 size=5626/0 nlink=1 btime=2023-06-05T14:38:36.471178+0800 mtime=2023-06-05T14:38:36.471178+0800 ctime=2023-06-05T14:38:36.471178+0800 change_attr=1 caps=pAsLsXsFscr(0=pAsLsXsFscr) objectset[0x20004a6e548 ts 0/0 objects 1 dirty_or_tx 0] 0x1554e7902300) -53> 2023-06-28T09:51:00.453+0800 155546bf5700 3 client.159239 ll_read 0x15531806b970 0~8192 = 5626 -52> 2023-06-28T09:51:00.453+0800 155546ff7700 3 client.159239 may_lookup 0x155420004840 = 0 -51> 2023-06-28T09:51:00.453+0800 1554a5dee700 3 client.159239 ll_lookup 0x10003e1406f.head html -50> 2023-06-28T09:51:00.453+0800 155546ff7700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -> 0 (100019b7a43) -49> 2023-06-28T09:51:00.453+0800 1554a5dee700 3 client.159239 may_lookup 0x1554c1a89e40 = 0 -48> 2023-06-28T09:51:00.453+0800 1554a7dfe700 3 client.159239 ll_flush 0x15531806b970 0x20004a6e548 -47> 2023-06-28T09:51:00.453+0800 1555469f4700 3 client.159239 ll_lookup 0x100019b7a43.head envs -46> 2023-06-28T09:51:00.453+0800 1555469f4700 3 client.159239 may_lookup 0x155420343eb0 = 0 -45> 2023-06-28T09:51:00.453+0800 1555469f4700 3 client.159239 ll_lookup 0x100019b7a43.head envs -> 0 (200035de5d8) -44> 2023-06-28T09:51:00.453+0800 155545fef700 3 client.159239 ll_release (fh)0x15531806b970 0x20004a6e548 -43> 2023-06-28T09:51:00.453+0800 155546bf5700 3 client.159239 seekdir(0x15544c054010, 1152360438801891331) -42> 2023-06-28T09:51:00.453+0800 155546ff7700 3 client.159239 seekdir(0x15544c029ee0, 1152690945930559491) -41> 2023-06-28T09:51:00.453+0800 15554cc49700 3 client.159239 ll_releasedir 0x15544c029ee0 -40> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 ll_lookup 0x20004a66459.head tests -39> 2023-06-28T09:51:00.453+0800 15554c244700 3 client.159239 ll_lookup 0x100040bd04e.head att5410-w -> 0 (200047cbc02) -38> 2023-06-28T09:51:00.453+0800 1554a7dfe700 3 client.159239 ll_lookup 0x200035de5d8.head steven-colossal -37> 2023-06-28T09:51:00.453+0800 1554a7dfe700 3 client.159239 may_lookup 0x1554c20229c0 = 0 -36> 2023-06-28T09:51:00.453+0800 1554a7dfe700 3 client.159239 ll_lookup 0x200035de5d8.head steven-colossal -> 0 (100040bcf3a) -35> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 may_lookup 0x155533c970f0 = 0 -34> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 ll_lookup 0x20004a66459.head tests -> 0 (20004a6e3e0) -33> 2023-06-28T09:51:00.453+0800 155545bed700 3 client.159239 ll_releasedir 0x15544c054010 -32> 2023-06-28T09:51:00.453+0800 155546bf5700 3 client.159239 ll_getattr 0x200047cbc02.head = 0 -31> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -30> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 may_lookup 0x155420004840 = 0 -29> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -> 0 (100019b7a43) -28> 2023-06-28T09:51:00.453+0800 155545fef700 3 client.159239 ll_lookup 0x100040bcf3a.head lib -27> 2023-06-28T09:51:00.453+0800 155545fef700 3 client.159239 may_lookup 0x1554d7d1d240 = 0 -26> 2023-06-28T09:51:00.453+0800 155545fef700 3 client.159239 ll_lookup 0x100040bcf3a.head lib -> 0 (100040bd018) -25> 2023-06-28T09:51:00.453+0800 1555469f4700 3 client.159239 ll_lookup 0x20004a6e3e0.head test_trainer -24> 2023-06-28T09:51:00.453+0800 1555469f4700 3 client.159239 may_lookup 0x155531e5a6c0 = 0 -23> 2023-06-28T09:51:00.453+0800 15554cc49700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -22> 2023-06-28T09:51:00.453+0800 15554cc49700 3 client.159239 may_lookup 0x155420004840 = 0 -21> 2023-06-28T09:51:00.453+0800 15554cc49700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -> 0 (100019b7a43) -20> 2023-06-28T09:51:00.453+0800 15554c244700 3 client.159239 ll_lookup 0x100019b7a43.head envs -19> 2023-06-28T09:51:00.453+0800 15554c244700 3 client.159239 may_lookup 0x155420343eb0 = 0 -18> 2023-06-28T09:51:00.453+0800 15554c244700 3 client.159239 ll_lookup 0x100019b7a43.head envs -> 0 (200035de5d8) -17> 2023-06-28T09:51:00.453+0800 155546ff7700 3 client.159239 ll_lookup 0x100019b7a43.head envs -16> 2023-06-28T09:51:00.453+0800 155546ff7700 3 client.159239 may_lookup 0x155420343eb0 = 0 -15> 2023-06-28T09:51:00.453+0800 155546ff7700 3 client.159239 ll_lookup 0x100019b7a43.head envs -> 0 (200035de5d8) -14> 2023-06-28T09:51:00.453+0800 155545bed700 3 client.159239 ll_lookup 0x100040bd018.head terminfo -13> 2023-06-28T09:51:00.453+0800 1555475fa700 3 client.159239 ll_lookup 0x1000383ca0a.head train.py -> 0 (1000383caf7) -12> 2023-06-28T09:51:00.453+0800 1555465f2700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -11> 2023-06-28T09:51:00.453+0800 1555465f2700 3 client.159239 may_lookup 0x155420004840 = 0 -10> 2023-06-28T09:51:00.453+0800 1555465f2700 3 client.159239 ll_lookup 0x200017f674a.head anaconda3 -> 0 (100019b7a43) -9> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 ll_lookup 0x200035de5d8.head steven-colossal -8> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 may_lookup 0x1554c20229c0 = 0 -7> 2023-06-28T09:51:00.453+0800 1555452da700 3 client.159239 ll_lookup 0x200035de5d8.head steven-colossal -> 0 (100040bcf3a) -6> 2023-06-28T09:51:00.453+0800 15554cc49700 3 client.159239 ll_lookup 0x100019b7a43.head envs -5> 2023-06-28T09:51:00.453+0800 1555465f2700 3 client.159239 ll_getattr 0x1000383caf7.head = 0 -4> 2023-06-28T09:51:00.453+0800 155546bf5700 3 client.159239 ll_lookup 0x200035de5d8.head llmzoo -3> 2023-06-28T09:51:00.453+0800 155546bf5700 3 client.159239 may_lookup 0x1554c20229c0 = 0 -2> 2023-06-28T09:51:00.453+0800 155546bf5700 3 client.159239 ll_lookup 0x200035de5d8.head llmzoo -> 0 (10003e11a9e) -1> 2023-06-28T09:51:00.453+0800 15554c646700 3 client.159239 ll_lookup 0x10003e11a9e.head lib 0> 2023-06-28T09:51:00.458+0800 1554a77fb700 -1 *** Caught signal (Segmentation fault) ** in thread 1554a77fb700 thread_name:ceph-fuse ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable) 1: /lib64/libpthread.so.0(+0x12ce0) [0x1555535eece0] 2: (Client::_readdir_cache_cb(dir_result_t*, int (*)(void*, dirent*, ceph_statx*, long, Inode*), void*, int, bool)+0x2f4) [0x555555647d64] 3: (Client::readdir_r_cb(dir_result_t*, int (*)(void*, dirent*, ceph_statx*, long, Inode*), void*, unsigned int, unsigned int, bool)+0xae7) [0x55555564cd37] 4: ceph-fuse(+0xadbf8) [0x555555601bf8] 5: /lib64/libfuse.so.2(+0x16706) [0x1555550fd706] 6: /lib64/libfuse.so.2(+0x17868) [0x1555550fe868] 7: /lib64/libfuse.so.2(+0x14440) [0x1555550fb440] 8: /lib64/libpthread.so.0(+0x81cf) [0x1555535e41cf] 9: clone() NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. --- logging levels --- 0/ 5 none 0/ 1 lockdep 0/ 1 context 1/ 1 crush 1/ 5 mds 1/ 5 mds_balancer 1/ 5 mds_locker 1/ 5 mds_log 1/ 5 mds_log_expire 1/ 5 mds_migrator 0/ 1 buffer 0/ 1 timer 0/ 1 filer 0/ 1 striper 0/ 1 objecter 0/ 5 rados 0/ 5 rbd 0/ 5 rbd_mirror 0/ 5 rbd_replay 0/ 5 rbd_pwl 0/ 5 journaler 0/ 5 objectcacher 0/ 5 immutable_obj_cache 1/ 5 client 1/ 5 osd 0/ 5 optracker 0/ 5 objclass 1/ 3 filestore 1/ 3 journal 0/ 0 ms 1/ 5 mon 0/10 monc 1/ 5 paxos 0/ 5 tp 1/ 5 auth 1/ 5 crypto 1/ 1 finisher 1/ 1 reserver 1/ 5 heartbeatmap 1/ 5 perfcounter 1/ 5 rgw 1/ 5 rgw_sync 1/ 5 rgw_datacache 1/10 civetweb 1/ 5 javaclient 1/ 5 asok 1/ 1 throttle 0/ 0 refs 1/ 5 compressor 1/ 5 bluestore 1/ 5 bluefs 1/ 3 bdev 1/ 5 kstore 4/ 5 rocksdb 4/ 5 leveldb 4/ 5 memdb 1/ 5 fuse 2/ 5 mgr 1/ 5 mgrc 1/ 5 dpdk 1/ 5 eventtrace 1/ 5 prioritycache 0/ 5 test 0/ 5 cephfs_mirror 0/ 5 cephsqlite 0/ 5 seastore 0/ 5 seastore_onode 0/ 5 seastore_odata 0/ 5 seastore_omap 0/ 5 seastore_tm 0/ 5 seastore_cleaner 0/ 5 seastore_lba 0/ 5 seastore_cache 0/ 5 seastore_journal 0/ 5 seastore_device 0/ 5 alienstore 1/ 5 mclock 1/ 5 ceph_exporter -2/-2 (syslog threshold) -1/-1 (stderr threshold) --- pthread ID / name mapping for recent threads --- 1554947e3700 / ceph-fuse 155494fe7700 / ceph-fuse 1554975fa700 / ceph-fuse 1554a57eb700 / ceph-fuse 1554a59ec700 / ceph-fuse 1554a5dee700 / ceph-fuse 1554a5fef700 / ceph-fuse 1554a61f0700 / 1554a63f1700 / ceph-fuse 1554a65f2700 / ceph-fuse 1554a67f3700 / ceph-fuse 1554a6bf5700 / ceph-fuse 1554a6df6700 / ceph-fuse 1554a71f8700 / ceph-fuse 1554a73f9700 / ceph-fuse 1554a75fa700 / 1554a77fb700 / ceph-fuse 1554a7dfe700 / ceph-fuse 1554a7fff700 / ceph-fuse 1555452da700 / ceph-fuse 1555455ea700 / ceph-fuse 1555459ec700 / ceph-fuse 155545bed700 / ceph-fuse 155545fef700 / ceph-fuse 1555465f2700 / ceph-fuse 1555469f4700 / ceph-fuse 155546bf5700 / ceph-fuse 155546df6700 / ceph-fuse 155546ff7700 / ceph-fuse 1555471f8700 / ceph-fuse 1555473f9700 / ceph-fuse 1555475fa700 / ceph-fuse 1555477fb700 / 155547dfe700 / ceph-fuse 155547fff700 / ceph-fuse 15554c244700 / ceph-fuse 15554c646700 / ceph-fuse 15554c847700 / ceph-fuse 15554cc49700 / ceph-fuse 15554ce4a700 / ceph-fuse 15554da50700 / ms_dispatch max_recent 10000 max_new 10000 log_file /var/lib/ceph/crash/2023-06-28T01:51:00.459615Z_3ddbaa44-d8cd-437b-908b-c3772520c7a6/log --- end dump of recent events --- Has anyone encountered this kind of problem? _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx