From: Eric Dumazet <edumazet@xxxxxxxxxx> This refactors ip_expire() since one indentation level is removed. Note: in the future, we should try hard to avoid the skb_clone() since this is a serious performance cost. Under DDOS, the ICMP message wont be sent because of rate limits. Fact that ip6_expire_frag_queue() does not use skb_clone() is disturbing too. Presumably IPv6 should have the same issue than the one we fixed in commit ec4fbd64751d ("inet: frag: release spinlock before calling icmp_send()") Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx> Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> (cherry picked from commit 399d1404be660d355192ff4df5ccc3f4159ec1e4) --- include/net/inet_frag.h | 5 ---- net/ipv4/ip_fragment.c | 65 ++++++++++++++++++++--------------------- net/ipv6/reassembly.c | 4 --- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index a49cdf25cef0..7e984045b2b7 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -118,11 +118,6 @@ static inline void inet_frag_put(struct inet_frag_queue *q) inet_frag_destroy(q); } -static inline bool inet_frag_evicting(struct inet_frag_queue *q) -{ - return false; -} - /* Memory Tracking Functions. */ static inline int frag_mem_limit(struct netns_frags *nf) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8c4072b19296..308592a8ba97 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -141,8 +141,11 @@ static bool frag_expire_skip_icmp(u32 user) */ static void ip_expire(unsigned long arg) { - struct ipq *qp; + struct sk_buff *clone, *head; + const struct iphdr *iph; struct net *net; + struct ipq *qp; + int err; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); @@ -156,45 +159,41 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); - if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *clone, *head = qp->q.fragments; - const struct iphdr *iph; - int err; + head = qp->q.fragments; - __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); + __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); - if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) - goto out; + if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head) + goto out; - head->dev = dev_get_by_index_rcu(net, qp->iif); - if (!head->dev) - goto out; + head->dev = dev_get_by_index_rcu(net, qp->iif); + if (!head->dev) + goto out; - /* skb has no dst, perform route lookup again */ - iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, + /* skb has no dst, perform route lookup again */ + iph = ip_hdr(head); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); - if (err) - goto out; + if (err) + goto out; - /* Only an end host needs to send an ICMP - * "Fragment Reassembly Timeout" message, per RFC792. - */ - if (frag_expire_skip_icmp(qp->q.key.v4.user) && - (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out; - - clone = skb_clone(head, GFP_ATOMIC); - - /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if (clone) { - spin_unlock(&qp->q.lock); - icmp_send(clone, ICMP_TIME_EXCEEDED, - ICMP_EXC_FRAGTIME, 0); - consume_skb(clone); - goto out_rcu_unlock; - } + /* Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. + */ + if (frag_expire_skip_icmp(qp->q.key.v4.user) && + (skb_rtable(head)->rt_type != RTN_LOCAL)) + goto out; + + clone = skb_clone(head, GFP_ATOMIC); + + /* Send an ICMP "Fragment Reassembly Timeout" message. */ + if (clone) { + spin_unlock(&qp->q.lock); + icmp_send(clone, ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0); + consume_skb(clone); + goto out_rcu_unlock; } out: spin_unlock(&qp->q.lock); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8fe3836d1410..629a45a4c79f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -106,10 +106,6 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) goto out_rcu_unlock; __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - - if (inet_frag_evicting(&fq->q)) - goto out_rcu_unlock; - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ -- 2.17.1