From: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> The bpf_fib_lookup() helper performs a neighbour lookup for the destination IP and returns BPF_FIB_LKUP_NO_NEIGH if this fails, with the expectation that the BPF program will deal with this condition, either by passing the packet up the stack, or by using bpf_redirect_neigh(). The neighbour lookup is done via a hash table (through ___neigh_lookup_noref()), which incurs some overhead. If the caller knows this is likely to fail anyway, it may want to skip that and go unconditionally to bpf_redirect_neigh(). For this use case, add a flag to bpf_fib_lookup() that will make it skip the neighbour lookup and instead always return BPF_FIB_LKUP_RET_NO_NEIGH (but still populate the gateway and target ifindex). Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> --- include/uapi/linux/bpf.h | 10 ++++++---- net/core/filter.c | 16 ++++++++++++++-- tools/include/uapi/linux/bpf.h | 10 ++++++---- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9668cde9d684..4bfd3c72dae6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args { __u64 args[0]; }; -/* DIRECT: Skip the FIB rules and go to FIB table associated with device - * OUTPUT: Do lookup from egress perspective; default is ingress +/* DIRECT: Skip the FIB rules and go to FIB table associated with device + * OUTPUT: Do lookup from egress perspective; default is ingress + * SKIP_NEIGH: Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on success */ enum { - BPF_FIB_LOOKUP_DIRECT = (1U << 0), - BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index fa09b4f141ae..9791e6311afa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5382,6 +5382,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (nhc->nhc_gw_family) params->ipv4_dst = nhc->nhc_gw.ipv4; + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + return BPF_FIB_LKUP_RET_NO_NEIGH; + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); } else { @@ -5389,6 +5392,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->family = AF_INET6; *dst = nhc->nhc_gw.ipv6; + + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + return BPF_FIB_LKUP_RET_NO_NEIGH; + neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); } @@ -5501,6 +5508,9 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) + return BPF_FIB_LKUP_RET_NO_NEIGH; + /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is * not needed here. */ @@ -5518,7 +5528,8 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, if (plen < sizeof(*params)) return -EINVAL; - if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) + if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | + BPF_FIB_LOOKUP_SKIP_NEIGH)) return -EINVAL; switch (params->family) { @@ -5555,7 +5566,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, if (plen < sizeof(*params)) return -EINVAL; - if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) + if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | + BPF_FIB_LOOKUP_SKIP_NEIGH)) return -EINVAL; switch (params->family) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9668cde9d684..4bfd3c72dae6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args { __u64 args[0]; }; -/* DIRECT: Skip the FIB rules and go to FIB table associated with device - * OUTPUT: Do lookup from egress perspective; default is ingress +/* DIRECT: Skip the FIB rules and go to FIB table associated with device + * OUTPUT: Do lookup from egress perspective; default is ingress + * SKIP_NEIGH: Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on success */ enum { - BPF_FIB_LOOKUP_DIRECT = (1U << 0), - BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), }; enum {