In this commit, we make ip_route_input_noref() return drop reasons, which come from ip_route_input_rcu(). We need adjust the callers of ip_route_input_noref() to make sure the return value of ip_route_input_noref() is used properly. The errno that ip_route_input_noref() returns comes from ip_route_input and bpf_lwt_input_reroute in the origin logic, and we make them return -EINVAL on error instead. In the following patch, we will make ip_route_input() returns drop reasons too. Signed-off-by: Menglong Dong <dongml2@xxxxxxxxxxxxxxx> --- v5: - remove the unneeded "else" in ip_expire() v4: - introduce the variable "reason" in bpf_lwt_input_reroute() to make things clear --- include/net/route.h | 15 ++++++++------- net/core/lwt_bpf.c | 6 ++++-- net/ipv4/ip_fragment.c | 11 ++++++----- net/ipv4/ip_input.c | 7 ++++--- net/ipv4/route.c | 7 ++++--- 5 files changed, 26 insertions(+), 20 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index a828a17a6313..11674f7c6be6 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -203,8 +203,9 @@ enum skb_drop_reason ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); -int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - dscp_t dscp, struct net_device *dev); +enum skb_drop_reason +ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev); int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); @@ -212,18 +213,18 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp, struct net_device *devin) { - int err; + enum skb_drop_reason reason; rcu_read_lock(); - err = ip_route_input_noref(skb, dst, src, dscp, devin); - if (!err) { + reason = ip_route_input_noref(skb, dst, src, dscp, devin); + if (!reason) { skb_dst_force(skb); if (!skb_dst(skb)) - err = -EINVAL; + reason = SKB_DROP_REASON_NOT_SPECIFIED; } rcu_read_unlock(); - return err; + return reason ? -EINVAL : 0; } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e0ca24a58810..8a78bff53b2c 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -88,6 +88,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, static int bpf_lwt_input_reroute(struct sk_buff *skb) { + enum skb_drop_reason reason; int err = -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { @@ -96,8 +97,9 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) dev_hold(dev); skb_dst_drop(skb); - err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - ip4h_dscp(iph), dev); + reason = ip_route_input_noref(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev); + err = reason ? -EINVAL : 0; dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { skb_dst_drop(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 48e2810f1f27..07036a2943c1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -132,12 +132,12 @@ static bool frag_expire_skip_icmp(u32 user) */ static void ip_expire(struct timer_list *t) { + enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; struct inet_frag_queue *frag = from_timer(frag, t, timer); const struct iphdr *iph; struct sk_buff *head = NULL; struct net *net; struct ipq *qp; - int err; qp = container_of(frag, struct ipq, q); net = qp->q.fqdir->net; @@ -175,14 +175,15 @@ static void ip_expire(struct timer_list *t) /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph), - head->dev); - if (err) + reason = ip_route_input_noref(head, iph->daddr, iph->saddr, + ip4h_dscp(iph), head->dev); + if (reason) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ + reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT; if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; @@ -195,7 +196,7 @@ static void ip_expire(struct timer_list *t) spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); - kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); + kfree_skb_reason(head, reason); ipq_put(qp); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c40a26972884..513eb0c6435a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -362,10 +362,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { - err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - ip4h_dscp(iph), dev); - if (unlikely(err)) + drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr, + ip4h_dscp(iph), dev); + if (unlikely(drop_reason)) goto drop_error; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; } else { struct in_device *in_dev = __in_dev_get_rcu(dev); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1926a8a1a83a..ce1201dbf464 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2487,8 +2487,9 @@ ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res); } -int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - dscp_t dscp, struct net_device *dev) +enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, + __be32 saddr, dscp_t dscp, + struct net_device *dev) { enum skb_drop_reason reason; struct fib_result res; @@ -2497,7 +2498,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res); rcu_read_unlock(); - return reason ? -EINVAL : 0; + return reason; } EXPORT_SYMBOL(ip_route_input_noref); -- 2.39.5