On 10/15/20 9:46 AM, Toke Høiland-Jørgensen wrote: > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index bf5a99d803e4..980cc1363be8 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3677,15 +3677,19 @@ union bpf_attr { > * Return > * The id is returned or 0 in case the id could not be retrieved. > * > - * long bpf_redirect_neigh(u32 ifindex, u64 flags) > + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) why not fold ifindex into params? with params and plen this should be extensible later if needed. A couple of nits below that caught me eye. > * Description > * Redirect the packet to another net device of index *ifindex* > * and fill in L2 addresses from neighboring subsystem. This helper > * is somewhat similar to **bpf_redirect**\ (), except that it > * populates L2 addresses as well, meaning, internally, the helper > - * performs a FIB lookup based on the skb's networking header to > - * get the address of the next hop and then relies on the neighbor > - * lookup for the L2 address of the nexthop. > + * relies on the neighbor lookup for the L2 address of the nexthop. > + * > + * The helper will perform a FIB lookup based on the skb's > + * networking header to get the address of the next hop, unless > + * this is supplied by the caller in the *params* argument. The > + * *plen* argument indicates the len of *params* and should be set > + * to 0 if *params* is NULL. > * > * The *flags* argument is reserved and must be 0. The helper is > * currently only supported for tc BPF program types, and enabled > @@ -4906,6 +4910,17 @@ struct bpf_fib_lookup { > __u8 dmac[6]; /* ETH_ALEN */ > }; > > +struct bpf_redir_neigh { > + /* network family for lookup (AF_INET, AF_INET6) > + */ second line for the comment is not needed. > + __u8 nh_family; > + /* network address of nexthop; skips fib lookup to find gateway */ > + union { > + __be32 ipv4_nh; > + __u32 ipv6_nh[4]; /* in6_addr; network order */ > + }; > +}; > + > enum bpf_task_fd_type { > BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ > BPF_FD_TYPE_TRACEPOINT, /* tp name */ > diff --git a/net/core/filter.c b/net/core/filter.c > index c5e2a1c5fd8d..d073031a3a61 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -2165,12 +2165,11 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, > } > > #if IS_ENABLED(CONFIG_IPV6) > -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) > +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, > + struct net_device *dev, const struct in6_addr *nexthop) > { > - struct dst_entry *dst = skb_dst(skb); > - struct net_device *dev = dst->dev; > u32 hh_len = LL_RESERVED_SPACE(dev); > - const struct in6_addr *nexthop; > + struct dst_entry *dst = NULL; > struct neighbour *neigh; > > if (dev_xmit_recursion()) { > @@ -2196,8 +2195,11 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) > } > > rcu_read_lock_bh(); > - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), > - &ipv6_hdr(skb)->daddr); > + if (!nexthop) { > + dst = skb_dst(skb); > + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), > + &ipv6_hdr(skb)->daddr); > + } > neigh = ip_neigh_gw6(dev, nexthop); > if (likely(!IS_ERR(neigh))) { > int ret; > @@ -2210,36 +2212,46 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) > return ret; > } > rcu_read_unlock_bh(); > - IP6_INC_STATS(dev_net(dst->dev), > - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); > + if (dst) > + IP6_INC_STATS(dev_net(dst->dev), > + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); > out_drop: > kfree_skb(skb); > return -ENETDOWN; > } > > -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) > +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, > + struct bpf_nh_params *nh) > { > const struct ipv6hdr *ip6h = ipv6_hdr(skb); > + struct in6_addr *nexthop = NULL; > struct net *net = dev_net(dev); > int err, ret = NET_XMIT_DROP; > - struct dst_entry *dst; > - struct flowi6 fl6 = { > - .flowi6_flags = FLOWI_FLAG_ANYSRC, > - .flowi6_mark = skb->mark, > - .flowlabel = ip6_flowinfo(ip6h), > - .flowi6_oif = dev->ifindex, > - .flowi6_proto = ip6h->nexthdr, > - .daddr = ip6h->daddr, > - .saddr = ip6h->saddr, > - }; > > - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); > - if (IS_ERR(dst)) > - goto out_drop; > + if (!nh->nh_family) { > + struct dst_entry *dst; reverse xmas tree ordering > + struct flowi6 fl6 = { > + .flowi6_flags = FLOWI_FLAG_ANYSRC, > + .flowi6_mark = skb->mark, > + .flowlabel = ip6_flowinfo(ip6h), > + .flowi6_oif = dev->ifindex, > + .flowi6_proto = ip6h->nexthdr, > + .daddr = ip6h->daddr, > + .saddr = ip6h->saddr, > + }; > + > + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); > + if (IS_ERR(dst)) > + goto out_drop; > > - skb_dst_set(skb, dst); > + skb_dst_set(skb, dst); > + } else if (nh->nh_family == AF_INET6) { > + nexthop = &nh->ipv6_nh; > + } else { > + goto out_drop; > + } > > - err = bpf_out_neigh_v6(net, skb); > + err = bpf_out_neigh_v6(net, skb, dev, nexthop); > if (unlikely(net_xmit_eval(err))) > dev->stats.tx_errors++; > else