Hi, I encountered some strange PMTUD related behaviour that I need help in understanding. Setup: +-----------+ +---+ +--------+ | 10.0.0.10 |--------| X |--------|10.0.0.3| +-----------+ +---+ +--------+ A one to many socket is setup at 10.0.0.10. Two instances of the lksctp sctp_darn applications are ran at 10.0.0.3 listening to ports 8001 and 8002. 10.0.0.3 was also setup to generate ICMP frag needed messages for incoming messages over 600 bytes. This same issue also occurs also when a router on the path was setup to generate the ICMP message instead. Test 1: Two associations were connected from 10.0.0.10 to 10.0.0.3, one to port 8001 and another one to 8002. Then a too large message was sent on the association to 8001, triggering ICMP generation. When checking the MTU reported in spinfo_mtu field of SCTP_GET_PEER_ADDR_INFO, the association now reports 600. The association to 8002 reports 1500 until traffic is sent on it, at which point it also adjusts to 600 which I think makes sense since the destination IP is the same. When reopening the associations, the value of 600 would be remembered for about 10 min, which I also think makes sense since net.ipv4.route.mtu_expires is 600. Test 2: Again the same two associations were connected to 10.0.0.3, but in addition an attempt to connect a third association to a non-existing IP was done, this attempt fails with timeout after a while. After that, again an ICMP triggering large message was sent to 8001. Now the behaviour is different from before. The association to 8001 reports a spinfo_mtu of 600, but only for a brief moment, it does not stay at 600 for 10 minutes. In addition the spinfo_mtu of the association to 8002 never changes, it stays at the original 1500. The only difference between the two tests is the attempt to connect to a non-responding IP at the beginning of test 2. Any ideas why the behaviour changes, is this a bug or is there some other reason for this? I have attached the sample application used for reproducing this. BR, -Peter ------ ver_linux output ------ Linux esalipe-test 4.4.0-93-generic #116-Ubuntu SMP Fri Aug 11 21:17:51 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux GNU C 5.4.0 GNU Make 4.1 Binutils 2.26.1 Util-linux 2.27.1 Mount 2.27.1 Module-init-tools 22 E2fsprogs 1.42.13 Xfsprogs 4.3.0 Linux C Library 2.23 Dynamic linker (ldd) 2.23 Linux C++ Library 6.0.21 Procps 3.3.10 Net-tools 1.60 Kbd 1.15.5 Console-tools 1.15.5 Sh-utils 8.25 Udev 229 Modules Loaded ablk_helper aes_x86_64 aesni_intel async_memcpy async_pq async_raid6_recov async_tx async_xor autofs4 binfmt_misc btrfs crc32_pclmul crct10dif_pclmul cryptd floppy gf128mul ghash_clmul ni_intel glue_helper hid hid_generic ib_addr ib_cm ib_core ib_iser ib_mad ib_sa input_leds irqbypass iscsi_tcp iw_cm joydev kvm kvm_intel libcrc32c libiscsi libiscsi_tcp linear lrw multipath parport parport_pc ppdev psmouse raid0 raid1 raid10 raid456 raid6_pq rdma_cm scsi_transport_iscsi sctp serio_raw usbhid xor
#include <cstring> #include <ctime> #include <iomanip> #include <iostream> #include <errno.h> #include <unistd.h> #include <arpa/inet.h> #include <net/if.h> #include <netinet/in.h> #include <netinet/sctp.h> #include <sys/ioctl.h> #include <sys/socket.h> using namespace std; static const int ERROR_BUFLEN = 64; static const char* SCTP_INTERFACE_NAME = "ens4"; static string data100 = "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789"; static string data1000 = "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789" "01234567890123456789012345678901234567890123456789"; void printError(const string& msg, const string& funcName) { char errorMessage[ERROR_BUFLEN] {}; char* errMsg = ::strerror_r(errno, errorMessage, sizeof(errorMessage)); cerr << "::" << funcName << ": " << msg << ": " << errMsg << endl; } int createSocket() { int sockFd = socket (AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); if (sockFd == -1) { printError("Creation of socket failed", __FUNCTION__); return -1; } // Enable address reuse int enable = 1; int err = setsockopt(sockFd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable)); if (err) { printError("Error setting socket option SO_REUSEADDR", __FUNCTION__); close(sockFd); return -1; } // Configure SCTP sctp_initmsg initmsg{}; initmsg.sinit_num_ostreams = 3; initmsg.sinit_max_instreams = 3; initmsg.sinit_max_attempts = 2; initmsg.sinit_max_init_timeo = 0; err = setsockopt(sockFd, IPPROTO_SCTP, SCTP_INITMSG, &initmsg, sizeof(initmsg)); if (err) { printError("Configuring SCTP socket failed", __FUNCTION__); close(sockFd); return -1; } struct sctp_paddrparams paddr_params{}; memset(&paddr_params, 0, sizeof(paddr_params)); socklen_t size_of_sctp_paddr_params = sizeof(paddr_params); paddr_params.spp_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; err = setsockopt(sockFd, IPPROTO_SCTP, SCTP_PEER_ADDR_PARAMS, &paddr_params, size_of_sctp_paddr_params); if (err) { printError("Configuring SCTP params failed", __FUNCTION__); close(sockFd); return -1; } return sockFd; } bool bindSocket(const int sockFd, const int localPort) { // Get IP of ethernet interface string localAddress = ""; ifreq ifr{}; ifr.ifr_addr.sa_family = AF_INET; strncpy(ifr.ifr_name, SCTP_INTERFACE_NAME, IFNAMSIZ - 1); const int ioctlStatus = ioctl(sockFd, SIOCGIFADDR, &ifr); if (ioctlStatus == -1) { printError("Failed to get local address", __FUNCTION__); return false; } char ipAddrBuffer[INET_ADDRSTRLEN] {}; inet_ntop(AF_INET, &reinterpret_cast<sockaddr_in*>(&(ifr.ifr_addr))->sin_addr, ipAddrBuffer, sizeof(ipAddrBuffer)); localAddress.assign(ipAddrBuffer); // Bind to found ip address sockaddr_in serv_addr{}; serv_addr.sin_family = AF_INET; inet_pton(AF_INET, localAddress.c_str(), &serv_addr.sin_addr); serv_addr.sin_port = htons(localPort); if (bind(sockFd, reinterpret_cast<sockaddr*>(&serv_addr), sizeof(serv_addr))) { printError("Failed to bind socket to local address", __FUNCTION__); localAddress.clear(); close(sockFd); return false; } cout << "Local endpoint succussfully bound to local address: " << localAddress << endl; return true; } bool openAssociation(const int sockFd, const string &remoteAddress, std::uint16_t remotePort) { sockaddr_in address{}; address.sin_family = AF_INET; inet_pton(AF_INET, remoteAddress.c_str(), &address.sin_addr); address.sin_port = htons(remotePort); int connectError = connect(sockFd, reinterpret_cast<sockaddr *>(&address), sizeof(address)); if (connectError) { printError("Error connecting association", __FUNCTION__); return false; } cout << "Association connected to address: " << remoteAddress << ":" << remotePort << endl; return true; } void sendReq(const int sockFd, const string& remoteAddress, const uint16_t remotePort, const std::string& data) { struct sockaddr_in remoteAddr {}; remoteAddr.sin_family = AF_INET; remoteAddr.sin_port = htons(remotePort); uint32_t payloadProtId = 7; uint16_t streamId = 0; uint32_t dataLength = data.size(); sockaddr* servaddr = reinterpret_cast<sockaddr*>(&remoteAddr); inet_pton(AF_INET, remoteAddress.c_str(), &remoteAddr.sin_addr); const std::string ipaddr = inet_ntoa(reinterpret_cast<sockaddr_in*>(servaddr)->sin_addr); cout << "Sending SCTP req to " << remoteAddress << ":" << remotePort; cout << ", len=" << dataLength << endl; const int bytesSent = sctp_sendmsg(sockFd, data.c_str(), (size_t)dataLength, servaddr, sizeof(sockaddr_in), htonl(payloadProtId), SCTP_ADDR_OVER, streamId, 200, 0); if (bytesSent == -1) { printError("SCTP send failed", __FUNCTION__); } return; } sctp_assoc_t getSocketAssociationId(const int sockFd, const string &remoteIpAddress, std::uint16_t remotePort) { sockaddr_in socket_address_in{}; socket_address_in.sin_family = AF_INET; socket_address_in.sin_port = htons(remotePort); inet_pton(AF_INET, remoteIpAddress.c_str(), &socket_address_in.sin_addr); struct sockaddr *socket_address = reinterpret_cast<sockaddr*>(&socket_address_in); socklen_t salen = sizeof(&socket_address); struct sctp_paddrinfo peer_address_info{}; socklen_t size_of_sctp_paddrinfo = sizeof peer_address_info; std::memcpy(&peer_address_info.spinfo_address, socket_address, salen); const int sctpOptInfoError = sctp_opt_info(sockFd, 0, SCTP_GET_PEER_ADDR_INFO, &peer_address_info, &size_of_sctp_paddrinfo); if (sctpOptInfoError) { printError("Failed to get association id", __FUNCTION__); } return peer_address_info.spinfo_assoc_id; } std::uint32_t getAssociationPathMtu(const int sockFd, const string &remoteIpAddress, const std::uint16_t remotePort) { sockaddr_in socket_address_in{}; socket_address_in.sin_family = AF_INET; socket_address_in.sin_port = htons(remotePort); inet_pton(AF_INET, remoteIpAddress.c_str(), &socket_address_in.sin_addr); struct sockaddr *socket_address = reinterpret_cast<sockaddr*>(&socket_address_in); socklen_t salen = sizeof(&socket_address); struct sctp_paddrinfo peer_address_info{}; socklen_t size_of_sctp_paddrinfo = sizeof(peer_address_info); std::memcpy(&peer_address_info.spinfo_address, socket_address, salen); sctp_assoc_t sctpAssociationId = getSocketAssociationId(sockFd, remoteIpAddress, remotePort); const int sctpOptInfoError = sctp_opt_info(sockFd, sctpAssociationId, SCTP_GET_PEER_ADDR_INFO, &peer_address_info, &size_of_sctp_paddrinfo); if (sctpOptInfoError) { printError("Failed to get pmtu", __FUNCTION__); } auto t = std::time(nullptr); auto tm = *std::localtime(&t); std::cout << std::put_time(&tm, "%H:%M:%S ") << remoteIpAddress << ":" << remotePort; cout << " currently has a PMTU of " << peer_address_info.spinfo_mtu << endl; return peer_address_info.spinfo_mtu; } void test1(const string& data) { int localPort = 2944; string remoteIp1 = "10.0.0.3"; uint16_t remotePort1 = 8001; uint16_t remotePort2 = 8002; int sockFd = createSocket(); bindSocket(sockFd, localPort); cout << "### Test 1: 2 assocs" << endl; openAssociation(sockFd, remoteIp1, remotePort1); openAssociation(sockFd, remoteIp1, remotePort2); getAssociationPathMtu(sockFd, remoteIp1, remotePort1); getAssociationPathMtu(sockFd, remoteIp1, remotePort2); sendReq(sockFd, remoteIp1, remotePort1, data); for (int i = 0; i < 10; i++) { sleep(10); getAssociationPathMtu(sockFd, remoteIp1, remotePort1); getAssociationPathMtu(sockFd, remoteIp1, remotePort2); } } void test2(const string& data) { int localPort = 2944; string remoteIp1 = "10.0.0.3"; uint16_t remotePort1 = 8001; uint16_t remotePort2 = 8002; string remoteIpFake = "10.52.96.204"; uint16_t remotePortFake = 3239; int sockFd = createSocket(); bindSocket(sockFd, localPort); cout << "### Test 2: 2 assocs + 1 unreachable assoc" << endl; openAssociation(sockFd, remoteIp1, remotePort1); openAssociation(sockFd, remoteIp1, remotePort2); openAssociation(sockFd, remoteIpFake, remotePortFake); getAssociationPathMtu(sockFd, remoteIp1, remotePort1); getAssociationPathMtu(sockFd, remoteIp1, remotePort2); sendReq(sockFd, remoteIp1, remotePort1, data); for (int i = 0; i < 10; i++) { sleep(10); getAssociationPathMtu(sockFd, remoteIp1, remotePort1); getAssociationPathMtu(sockFd, remoteIp1, remotePort2); } } int main(int argc, char** argv) { string testNr = "1"; string& testData = data1000; if (argc >= 2) { testNr = argv[1]; } if (argc >= 3) { testData = data100; } if (testNr == "1") { test1(testData); } else { test2(testData); } return 0; }