Hey, I Recently found this RDMA feature of ceph. Which I'm currently trying out. #rdma dev 0: mlx4_0: node_type ca fw 2.42.5000 node_guid 0010:e000:0189:1984 sys_image_guid 0010:e000:0189:1987 rdma_server and rdma_ping works as well as "udaddy". Stopped one of my osds, added following lines to ceph.conf ms_type = async+rdma ms_cluster_type = async+rdma ms_async_rdma_device_name = mlx4_0 ms_async_rdma_polling_us = 0 restarted the ceph osd and it crashes with: ** Caught signal (Segmentation fault) ** in thread 7f2c8dc132c0 thread_name:ceph-osd ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific (stable) 1: /lib/x86_64-linux-gnu/libc.so.6(+0x46520) [0x7f2c8de65520] 2: (RDMAStack::RDMAStack(ceph::common::CephContext*)+0x1c3) [0x5648024abdf3] 3: (NetworkStack::create(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x298) [0x5648024a3ae8] 4: (AsyncMessenger::AsyncMessenger(ceph::common::CephContext*, entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, unsigned long)+0x796) [0x564802496ed6] 5: (Messenger::create(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, unsigned long)+0xd8) [0x564802489f98] 6: (Messenger::create_client_messenger(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x168) [0x56480248a3a8] 7: (MonClient::get_monmap_and_config()+0x138) [0x5648024f0048] 8: (global_init(std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const*, std::vector<char const*, std::allocator<char const*> >&, unsigned int, code_environment_t, int, bool)+0x45f) [0x5648022abbef] 9: main() 10: /lib/x86_64-linux-gnu/libc.so.6(+0x2dfd0) [0x7f2c8de4cfd0] 11: __libc_start_main() 12: _start() 2022-02-01T12:21:56.261+0000 7f2c8dc132c0 -1 *** Caught signal (Segmentation fault) ** in thread 7f2c8dc132c0 thread_name:ceph-osd ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific (stable) 1: /lib/x86_64-linux-gnu/libc.so.6(+0x46520) [0x7f2c8de65520] 2: (RDMAStack::RDMAStack(ceph::common::CephContext*)+0x1c3) [0x5648024abdf3] 3: (NetworkStack::create(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x298) [0x5648024a3ae8] 4: (AsyncMessenger::AsyncMessenger(ceph::common::CephContext*, entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, unsigned long)+0x796) [0x564802496ed6] 5: (Messenger::create(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, unsigned long)+0xd8) [0x564802489f98] 6: (Messenger::create_client_messenger(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)+0x168) [0x56480248a3a8] 7: (MonClient::get_monmap_and_config()+0x138) [0x5648024f0048] 8: (global_init(std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const*, std::vector<char const*, std::allocator<char const*> >&, unsigned int, code_environment_t, int, bool)+0x45f) [0x5648022abbef] 9: main() 10: /lib/x86_64-linux-gnu/libc.so.6(+0x2dfd0) [0x7f2c8de4cfd0] 11: __libc_start_main() 12: _start() NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed to interpret this. -43> 2022-02-01T12:21:56.261+0000 7f2c8dc132c0 -1 *** Caught signal (Segmentation fault) ** Heres the gdb debug output Thread 1 "ceph-osd" received signal SIGSEGV, Segmentation fault. 0x00005555563c9df3 in RDMAStack::RDMAStack(ceph::common::CephContext*) () (gdb) backtrace #0 0x00005555563c9df3 in RDMAStack::RDMAStack(ceph::common::CephContext*) () #1 0x00005555563c1ae8 in NetworkStack::create(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) () #2 0x00005555563b4ed6 in AsyncMessenger::AsyncMessenger(ceph::common::CephContext*, entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, unsigned long) () #3 0x00005555563a7f98 in Messenger::create(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, unsigned long) () #4 0x00005555563a83a8 in Messenger::create_client_messenger(ceph::common::CephContext*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) () #5 0x000055555640e048 in MonClient::get_monmap_and_config() () #6 0x00005555561c9bef in global_init(std::map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > const*, std::vector<char const*, std::allocator<char const*> >&, unsigned int, code_environment_t, int, bool) () #7 0x0000555555ae299b in main () Using ceph 16.2.6 on ubuntu 21.10 impish ... anyone run into this problem? Thanks! _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx