David Howells wrote: > Hi Willem, > > Here's a reduced testcase. I doesn't require anything special; the key is > that the amount of data placed in the packet by the send() - it's related to > the MTU size. It needs to stuff in sufficient data to go over the > fragmentation limit (I think). > > In this case, my interface's MTU is 8192. send() is sticking in 8161 bytes of > data and then the output from the aforeposted debugging patch is: > > ==>splice_to_socket() 6630 > udp_sendmsg(8,8) > __ip_append_data(copy=-1,len=8, mtu=8192 skblen=8189 maxfl=8188) > pagedlen 9 = 9 - 0 > copy -1 = 9 - 0 - 1 - 9 > length 8 -= -1 + 0 > __ip_append_data(copy=8172,len=9, mtu=8192 skblen=20 maxfl=8188) > copy=8172 len=9 > skb_splice_from_iter(8,9) > __ip_append_data(copy=8164,len=1, mtu=8192 skblen=28 maxfl=8188) > copy=8164 len=1 > skb_splice_from_iter(0,1) > __ip_append_data(copy=8164,len=1, mtu=8192 skblen=28 maxfl=8188) > copy=8164 len=1 > skb_splice_from_iter(0,1) > __ip_append_data(copy=8164,len=1, mtu=8192 skblen=28 maxfl=8188) > copy=8164 len=1 > skb_splice_from_iter(0,1) > __ip_append_data(copy=8164,len=1, mtu=8192 skblen=28 maxfl=8188) > copy=8164 len=1 > skb_splice_from_iter(0,1) > copy=8164 len=1 > skb_splice_from_iter(0,1) > > It looks like send() pushes 1 byte over the fragmentation limit, then the > splice sees -1 crop up, the length to be copied is increased by 1, but > insufficient data is available and we go into an endless loop. > > --- > #define _GNU_SOURCE > #include <arpa/inet.h> > #include <fcntl.h> > #include <netinet/in.h> > #include <stdarg.h> > #include <stdbool.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > #include <unistd.h> > #include <sys/socket.h> > #include <sys/mman.h> > #include <sys/uio.h> > > #define OSERROR(R, S) do { if ((long)(R) == -1L) { perror((S)); exit(1); } } while(0) > > int main() > { > struct sockaddr_storage ss; > struct sockaddr_in sin; > void *buffer; > unsigned int tmp; > int pfd[2], sfd; > int res; > > OSERROR(pipe(pfd), "pipe"); > > sfd = socket(AF_INET, SOCK_DGRAM, 0); > OSERROR(sfd, "socket/2"); > > memset(&sin, 0, sizeof(sin)); > sin.sin_family = AF_INET; > sin.sin_port = htons(0); > sin.sin_addr.s_addr = htonl(0xc0a80601); > #warning you might want to set the address here - this is 192.168.6.1 > OSERROR(connect(sfd, (struct sockaddr *)&sin, sizeof(sin)), "connect"); > > buffer = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); > OSERROR(buffer, "mmap"); > > OSERROR(send(sfd, buffer, 8161, MSG_CONFIRM|MSG_MORE), "send"); > #warning you need to adjust the length on the above line to match your MTU > > OSERROR(write(pfd[1], buffer, 8), "write"); > > OSERROR(splice(pfd[0], 0, sfd, 0, 0x4ffe0ul, 0), "splice"); > return 0; > } That's helpful. Is the MSG_CONFIRM needed to trigger this? Appending to a MSG_MORE datagram that previously fit within MTU, but no longer, triggers the copy from skb_prev to skb in if (fraggap). I did not see how that would cause issues, but maybe something in how that second skb is setup makes none of the cases in the while loop successfully append, yet also not fail and exit. It would be helpful to know which path it takes (I assume skb_splice_from_iter) and what that returns (0?). Is this indeed trivially sidestepped if downgrading from splicing to regular copying with fragmentation? @@ -1042,7 +1042,7 @@ static int __ip_append_data(struct sock *sk, if (inet->hdrincl) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && - getfrag == ip_generic_getfrag) + getfrag == ip_generic_getfrag && transhdrlen)