[PATCH bpf v2 2/3] bpf_fib_lookup: optionally skip neighbour lookup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>

The bpf_fib_lookup() helper performs a neighbour lookup for the destination
IP and returns BPF_FIB_LKUP_NO_NEIGH if this fails, with the expectation
that the BPF program will deal with this condition, either by passing the
packet up the stack, or by using bpf_redirect_neigh().

The neighbour lookup is done via a hash table (through ___neigh_lookup_noref()),
which incurs some overhead. If the caller knows this is likely to fail
anyway, it may want to skip that and go unconditionally to
bpf_redirect_neigh(). For this use case, add a flag to bpf_fib_lookup()
that will make it skip the neighbour lookup and instead always return
BPF_FIB_LKUP_RET_NO_NEIGH (but still populate the gateway and target
ifindex).

Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>
---
 include/uapi/linux/bpf.h       |   10 ++++++----
 net/core/filter.c              |   16 ++++++++++++++--
 tools/include/uapi/linux/bpf.h |   10 ++++++----
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9668cde9d684..4bfd3c72dae6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args {
 	__u64 args[0];
 };
 
-/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
- * OUTPUT:  Do lookup from egress perspective; default is ingress
+/* DIRECT:      Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:      Do lookup from egress perspective; default is ingress
+ * SKIP_NEIGH:  Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on success
  */
 enum {
-	BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
-	BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+	BPF_FIB_LOOKUP_DIRECT	  = (1U << 0),
+	BPF_FIB_LOOKUP_OUTPUT	  = (1U << 1),
+	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index fa09b4f141ae..9791e6311afa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5382,6 +5382,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		if (nhc->nhc_gw_family)
 			params->ipv4_dst = nhc->nhc_gw.ipv4;
 
+		if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+			return BPF_FIB_LKUP_RET_NO_NEIGH;
+
 		neigh = __ipv4_neigh_lookup_noref(dev,
 						 (__force u32)params->ipv4_dst);
 	} else {
@@ -5389,6 +5392,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		params->family = AF_INET6;
 		*dst = nhc->nhc_gw.ipv6;
+
+		if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+			return BPF_FIB_LKUP_RET_NO_NEIGH;
+
 		neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
 	}
 
@@ -5501,6 +5508,9 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	params->rt_metric = res.f6i->fib6_metric;
 	params->ifindex = dev->ifindex;
 
+	if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+		return BPF_FIB_LKUP_RET_NO_NEIGH;
+
 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
 	 * not needed here.
 	 */
@@ -5518,7 +5528,8 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
 	if (plen < sizeof(*params))
 		return -EINVAL;
 
-	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT |
+		      BPF_FIB_LOOKUP_SKIP_NEIGH))
 		return -EINVAL;
 
 	switch (params->family) {
@@ -5555,7 +5566,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 	if (plen < sizeof(*params))
 		return -EINVAL;
 
-	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+	if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT |
+		      BPF_FIB_LOOKUP_SKIP_NEIGH))
 		return -EINVAL;
 
 	switch (params->family) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9668cde9d684..4bfd3c72dae6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4841,12 +4841,14 @@ struct bpf_raw_tracepoint_args {
 	__u64 args[0];
 };
 
-/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
- * OUTPUT:  Do lookup from egress perspective; default is ingress
+/* DIRECT:      Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:      Do lookup from egress perspective; default is ingress
+ * SKIP_NEIGH:  Skip neighbour lookup and return BPF_FIB_LKUP_RET_NO_NEIGH on success
  */
 enum {
-	BPF_FIB_LOOKUP_DIRECT  = (1U << 0),
-	BPF_FIB_LOOKUP_OUTPUT  = (1U << 1),
+	BPF_FIB_LOOKUP_DIRECT	  = (1U << 0),
+	BPF_FIB_LOOKUP_OUTPUT	  = (1U << 1),
+	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
 };
 
 enum {




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux