Daniel Borkmann <daniel@xxxxxxxxxxxxx> writes: > On 10/15/20 5:46 PM, Toke Høiland-Jørgensen wrote: >> From: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> >> >> Based on the discussion in [0], update the bpf_redirect_neigh() helper to >> accept an optional parameter specifying the nexthop information. This makes >> it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without >> incurring a duplicate FIB lookup - since the FIB lookup helper will return >> the nexthop information even if no neighbour is present, this can simply be >> passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns >> BPF_FIB_LKUP_RET_NO_NEIGH. >> >> [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@xxxxxxxxxxxxx/ >> >> Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> > > Overall looks good from what I can tell, just small nits below on top of > David's feedback: > > [...] >> -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) >> +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, >> + struct bpf_nh_params *nh) >> { >> const struct iphdr *ip4h = ip_hdr(skb); >> struct net *net = dev_net(dev); >> int err, ret = NET_XMIT_DROP; >> - struct rtable *rt; >> - struct flowi4 fl4 = { >> - .flowi4_flags = FLOWI_FLAG_ANYSRC, >> - .flowi4_mark = skb->mark, >> - .flowi4_tos = RT_TOS(ip4h->tos), >> - .flowi4_oif = dev->ifindex, >> - .flowi4_proto = ip4h->protocol, >> - .daddr = ip4h->daddr, >> - .saddr = ip4h->saddr, >> - }; >> >> - rt = ip_route_output_flow(net, &fl4, NULL); >> - if (IS_ERR(rt)) >> - goto out_drop; >> - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { >> - ip_rt_put(rt); >> - goto out_drop; >> - } >> + if (!nh->nh_family) { >> + struct rtable *rt; >> + struct flowi4 fl4 = { >> + .flowi4_flags = FLOWI_FLAG_ANYSRC, >> + .flowi4_mark = skb->mark, >> + .flowi4_tos = RT_TOS(ip4h->tos), >> + .flowi4_oif = dev->ifindex, >> + .flowi4_proto = ip4h->protocol, >> + .daddr = ip4h->daddr, >> + .saddr = ip4h->saddr, >> + }; >> + >> + rt = ip_route_output_flow(net, &fl4, NULL); >> + if (IS_ERR(rt)) >> + goto out_drop; >> + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { >> + ip_rt_put(rt); >> + goto out_drop; >> + } >> >> - skb_dst_set(skb, &rt->dst); >> + skb_dst_set(skb, &rt->dst); >> + nh = NULL; >> + } >> >> - err = bpf_out_neigh_v4(net, skb); >> + err = bpf_out_neigh_v4(net, skb, dev, nh); >> if (unlikely(net_xmit_eval(err))) >> dev->stats.tx_errors++; >> else >> @@ -2355,7 +2383,8 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) >> } >> #endif /* CONFIG_INET */ >> >> -static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) >> +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev, >> + struct bpf_nh_params *nh) >> { >> struct ethhdr *ethh = eth_hdr(skb); >> >> @@ -2370,9 +2399,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) >> skb_reset_network_header(skb); >> >> if (skb->protocol == htons(ETH_P_IP)) >> - return __bpf_redirect_neigh_v4(skb, dev); >> + return __bpf_redirect_neigh_v4(skb, dev, nh); >> else if (skb->protocol == htons(ETH_P_IPV6)) >> - return __bpf_redirect_neigh_v6(skb, dev); >> + return __bpf_redirect_neigh_v6(skb, dev, nh); >> out: >> kfree_skb(skb); >> return -ENOTSUPP; >> @@ -2455,8 +2484,8 @@ int skb_do_redirect(struct sk_buff *skb) >> return -EAGAIN; >> } >> return flags & BPF_F_NEIGH ? >> - __bpf_redirect_neigh(skb, dev) : >> - __bpf_redirect(skb, dev, flags); >> + __bpf_redirect_neigh(skb, dev, &ri->nh) : >> + __bpf_redirect(skb, dev, flags); >> out_drop: >> kfree_skb(skb); >> return -EINVAL; >> @@ -2504,16 +2533,23 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { >> .arg2_type = ARG_ANYTHING, >> }; >> >> -BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) >> +BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, >> + int, plen, u64, flags) >> { >> struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); >> >> - if (unlikely(flags)) >> + if (unlikely((plen && plen < sizeof(*params)) || flags)) >> return TC_ACT_SHOT; >> >> ri->flags = BPF_F_NEIGH; >> ri->tgt_index = ifindex; >> >> + BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params)); >> + if (plen) >> + memcpy(&ri->nh, params, sizeof(ri->nh)); >> + else >> + ri->nh.nh_family = 0; /* clear previous value */ > > I'd probably just add an internal flag and do ... > > ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0); > > ... instead of above clearing, and skb_do_redirect() then becomes: > > __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? &ri->nh : NULL) > > ... which would then also avoid this !nh->nh_family check where you later on > set nh = NULL to pass it onwards. Ah yes, excellent idea! Will fix :) -Toke