Hey, I gave up on this topic.. ceph does not properly support it. Even though it seems really promising. Tested a ping on 40gbit with rdma ud which took for both ways 6us. rdma rd i didnt got running (maybe my cards need some special threatment..). tcp-ping at mtu 9000 took 20us tcp-ping at mtu 1500 took 36us all measured against 4k blocksize. so the transport is up to ~3-4 times faster by using rdma. David Yang <gmydw1118@xxxxxxxxx> schrieb am Mo., 7. Feb. 2022, 10:17: > Did you add the configuration directly to the conf? > I see that other people's posts need to be recompiled after adding rdma. > I'm also going to try rdma mode now, but haven't found any more info. > > sascha a. <sascha.arthur@xxxxxxxxx> 于2022年2月1日周二 20:31写道: > >> Hey, >> >> I Recently found this RDMA feature of ceph. Which I'm currently trying >> out. >> >> #rdma dev >> 0: mlx4_0: node_type ca fw 2.42.5000 node_guid 0010:e000:0189:1984 >> sys_image_guid 0010:e000:0189:1987 >> >> rdma_server and rdma_ping works as well as "udaddy". >> >> Stopped one of my osds, added following lines to ceph.conf >> >> ms_type = async+rdma >> ms_cluster_type = async+rdma >> ms_async_rdma_device_name = mlx4_0 >> ms_async_rdma_polling_us = 0 >> >> restarted the ceph osd and it crashes with: >> >> ** Caught signal (Segmentation fault) ** >> in thread 7f2c8dc132c0 thread_name:ceph-osd >> ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific >> (stable) >> 1: /lib/x86_64-linux-gnu/libc.so.6(+0x46520) [0x7f2c8de65520] >> 2: (RDMAStack::RDMAStack(ceph::common::CephContext*)+0x1c3) >> [0x5648024abdf3] >> 3: (NetworkStack::create(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&)+0x298) [0x5648024a3ae8] >> 4: (AsyncMessenger::AsyncMessenger(ceph::common::CephContext*, >> entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&, std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> >, unsigned long)+0x796) >> [0x564802496ed6] >> 5: (Messenger::create(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&, entity_name_t, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >, unsigned long)+0xd8) [0x564802489f98] >> 6: (Messenger::create_client_messenger(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >)+0x168) [0x56480248a3a8] >> 7: (MonClient::get_monmap_and_config()+0x138) [0x5648024f0048] >> 8: (global_init(std::map<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> >, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >, std::less<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> > >, >> std::allocator<std::pair<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> > const, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > > > > const*, std::vector<char const*, >> std::allocator<char const*> >&, unsigned int, code_environment_t, int, >> bool)+0x45f) [0x5648022abbef] >> 9: main() >> 10: /lib/x86_64-linux-gnu/libc.so.6(+0x2dfd0) [0x7f2c8de4cfd0] >> 11: __libc_start_main() >> 12: _start() >> 2022-02-01T12:21:56.261+0000 7f2c8dc132c0 -1 *** Caught signal >> (Segmentation fault) ** >> in thread 7f2c8dc132c0 thread_name:ceph-osd >> >> ceph version 16.2.6 (ee28fb57e47e9f88813e24bbf4c14496ca299d31) pacific >> (stable) >> 1: /lib/x86_64-linux-gnu/libc.so.6(+0x46520) [0x7f2c8de65520] >> 2: (RDMAStack::RDMAStack(ceph::common::CephContext*)+0x1c3) >> [0x5648024abdf3] >> 3: (NetworkStack::create(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&)+0x298) [0x5648024a3ae8] >> 4: (AsyncMessenger::AsyncMessenger(ceph::common::CephContext*, >> entity_name_t, std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&, std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> >, unsigned long)+0x796) >> [0x564802496ed6] >> 5: (Messenger::create(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&, entity_name_t, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >, unsigned long)+0xd8) [0x564802489f98] >> 6: (Messenger::create_client_messenger(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >)+0x168) [0x56480248a3a8] >> 7: (MonClient::get_monmap_and_config()+0x138) [0x5648024f0048] >> 8: (global_init(std::map<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> >, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >, std::less<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> > >, >> std::allocator<std::pair<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> > const, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > > > > const*, std::vector<char const*, >> std::allocator<char const*> >&, unsigned int, code_environment_t, int, >> bool)+0x45f) [0x5648022abbef] >> 9: main() >> 10: /lib/x86_64-linux-gnu/libc.so.6(+0x2dfd0) [0x7f2c8de4cfd0] >> 11: __libc_start_main() >> 12: _start() >> NOTE: a copy of the executable, or `objdump -rdS <executable>` is needed >> to interpret this. >> >> -43> 2022-02-01T12:21:56.261+0000 7f2c8dc132c0 -1 *** Caught signal >> (Segmentation fault) ** >> >> Heres the gdb debug output >> >> Thread 1 "ceph-osd" received signal SIGSEGV, Segmentation fault. >> 0x00005555563c9df3 in RDMAStack::RDMAStack(ceph::common::CephContext*) () >> (gdb) backtrace >> #0 0x00005555563c9df3 in RDMAStack::RDMAStack(ceph::common::CephContext*) >> () >> #1 0x00005555563c1ae8 in NetworkStack::create(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&) () >> #2 0x00005555563b4ed6 in >> AsyncMessenger::AsyncMessenger(ceph::common::CephContext*, entity_name_t, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&, std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> >, unsigned long) () >> #3 0x00005555563a7f98 in Messenger::create(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > const&, entity_name_t, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >, unsigned long) () >> #4 0x00005555563a83a8 in >> Messenger::create_client_messenger(ceph::common::CephContext*, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >) () >> #5 0x000055555640e048 in MonClient::get_monmap_and_config() () >> #6 0x00005555561c9bef in >> global_init(std::map<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> >, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> >, std::less<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> > >, >> std::allocator<std::pair<std::__cxx11::basic_string<char, >> std::char_traits<char>, std::allocator<char> > const, >> std::__cxx11::basic_string<char, std::char_traits<char>, >> std::allocator<char> > > > > const*, std::vector<char const*, >> std::allocator<char const*> >&, unsigned int, code_environment_t, int, >> bool) () >> #7 0x0000555555ae299b in main () >> >> Using ceph 16.2.6 on ubuntu 21.10 impish ... anyone run into this problem? >> >> Thanks! >> _______________________________________________ >> ceph-users mailing list -- ceph-users@xxxxxxx >> To unsubscribe send an email to ceph-users-leave@xxxxxxx >> > _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx