Backport of dad3a9314ac95dedc007bc7dacacb396ea10e376: tcp_fragment() might be called for skbs in the write queue. Memory limits might have been exceeded because tcp_sendmsg() only checks limits at full skb (64KB) boundaries. Therefore, we need to make sure tcp_fragment() wont punish applications that might have setup very low SO_SNDBUF values. Backport notes: Initial version used tcp_queue type which is not present in older kernels, so added a new arg to tcp_fragment() to determine whether this is a retransmit or not. Fixes: 9daf226ff926 ("tcp: tcp_fragment() should apply sane memory limits") Signed-off-by: Josh Hunt <johunt@xxxxxxxxxx> Reviewed-by: Jason Baron <jbaron@xxxxxxxxxx> --- Eric/Greg - This applies on top of v4.14.130. I did not see anything come through for the older (<4.19) stable kernels yet. Without this change Christoph Paasch's packetrill script (https://lore.kernel.org/netdev/CALMXkpYVRxgeqarp4gnmX7GqYh1sWOAt6UaRFqYBOaaNFfZ5sw@xxxxxxxxxxxxxx/) will fail on 4.14 stable kernels, but passes with this change. include/net/tcp.h | 3 ++- net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 16 ++++++++-------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1179ef4f0768..9d69fefa365c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -554,7 +554,8 @@ void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); -int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); +int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t, + bool retrans); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8e080f3b75bd..0fd629587104 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1202,7 +1202,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, if (pkt_len >= skb->len && !in_sack) return 0; - err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC, true); if (err < 0) return err; } @@ -2266,7 +2266,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) /* If needed, chop off the prefix to mark as lost. */ lost = (packets - oldcnt) * mss; if (lost < skb->len && - tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) + tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC, true) < 0) break; cnt = packets; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a8772e11dc1c..ca14770dd7ba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1259,7 +1259,7 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) * Remember, these are still headerless SKBs at this point. */ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, - unsigned int mss_now, gfp_t gfp) + unsigned int mss_now, gfp_t gfp, bool retrans) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; @@ -1274,7 +1274,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && retrans)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); return -ENOMEM; } @@ -1834,7 +1834,7 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, * packet has never been sent out before (and thus is not cloned). */ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, - unsigned int mss_now, gfp_t gfp) + unsigned int mss_now, gfp_t gfp, bool retrans) { struct sk_buff *buff; int nlen = skb->len - len; @@ -1842,7 +1842,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, skb, len, mss_now, gfp); + return tcp_fragment(sk, skb, len, mss_now, gfp, retrans); buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) @@ -2361,7 +2361,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle); if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp, false))) break; if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) @@ -2514,7 +2514,7 @@ void tcp_send_loss_probe(struct sock *sk) if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, - GFP_ATOMIC))) + GFP_ATOMIC, true))) goto rearm_timer; skb = tcp_write_queue_next(sk, skb); } @@ -2874,7 +2874,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) len = cur_mss * segs; if (skb->len > len) { - if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) + if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC, true)) return -ENOMEM; /* We'll try again later. */ } else { if (skb_unclone(skb, GFP_ATOMIC)) @@ -3696,7 +3696,7 @@ int tcp_write_wakeup(struct sock *sk, int mib) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) + if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC, false)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(skb, mss); -- 2.7.4