On 9/25/20 12:12 AM, David Ahern wrote:
On 9/24/20 12:21 PM, Daniel Borkmann wrote:
diff --git a/net/core/filter.c b/net/core/filter.c
index 0f913755bcba..19caa2fc21e8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2160,6 +2160,205 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
return __bpf_redirect_no_mac(skb, dev, flags);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ const struct in6_addr *nexthop;
+ struct neighbour *neigh;
+
+ if (dev_xmit_recursion())
+ goto out_rec;
+ skb->dev = dev;
+ rcu_read_lock_bh();
+ nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, nexthop);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(ipv6_stub->nd_tbl, nexthop, dev, false);
the last 3 lines can be replaced with ip_neigh_gw6.
Ah, nice, I wasn't aware of that one. I'll take it. :)
+ if (likely(!IS_ERR(neigh))) {
+ int ret;
+
+ sock_confirm_neigh(skb, neigh);
+ dev_xmit_recursion_inc();
+ ret = neigh_output(neigh, skb, false);
+ dev_xmit_recursion_dec();
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+out_drop:
+ kfree_skb(skb);
+ return -EINVAL;
+out_rec:
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ goto out_drop;
+}
+
...
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct rtable *rt = (struct rtable *)dst;
please use container_of here; I'd like to see the typecasts get removed.
Will do, thx!
+ struct net_device *dev = dst->dev;
+ u32 hh_len = LL_RESERVED_SPACE(dev);
+ struct neighbour *neigh;
+ bool is_v6gw = false;
+
+ if (dev_xmit_recursion())
+ goto out_rec;