This patch adds a new bpf helper to access FDB in the kernel tables from XDP programs. The helper enables us to find the destination port of master bridge in XDP layer with high speed. If an entry in the tables is successfully found, egress device index will be returned. In cases of failure, packets will be dropped or forwarded to upper networking stack in the kernel by XDP programs. Multicast and broadcast packets are currently not supported. Thus, these will need to be passed to upper layer on the basis of XDP_PASS action. The API uses destination MAC and VLAN ID as keys, so XDP programs need to extract these from forwarded packets. Signed-off-by: Yoshiki Komachi <komachi.yoshiki@xxxxxxxxx> --- include/uapi/linux/bpf.h | 28 +++++++++++++++++++++ net/core/filter.c | 45 ++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 1 + tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++ 4 files changed, 102 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54d0c886e3ba..f2e729dd1721 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2149,6 +2149,22 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags) + * Description + * Do FDB lookup in kernel tables using parameters in *params*. + * If lookup is successful (ie., FDB lookup finds a destination entry), + * ifindex is set to the egress device index from the FDB lookup. + * Both multicast and broadcast packets are currently unsupported + * in XDP layer. + * + * *plen* argument is the size of the passed **struct bpf_fdb_lookup**. + * *ctx* is only **struct xdp_md** for XDP programs. + * + * Return + * * < 0 if any input argument is invalid + * * 0 on success (destination port is found) + * * > 0 on failure (there is no entry) + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -3449,6 +3465,7 @@ union bpf_attr { FN(get_stack), \ FN(skb_load_bytes_relative), \ FN(fib_lookup), \ + FN(fdb_lookup), \ FN(sock_hash_update), \ FN(msg_redirect_hash), \ FN(sk_redirect_hash), \ @@ -4328,6 +4345,17 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +enum { + BPF_FDB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FDB_LKUP_RET_NOENT, /* entry is not found */ +}; + +struct bpf_fdb_lookup { + unsigned char addr[6]; /* ETH_ALEN */ + __u16 vlan_id; + __u32 ifindex; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/net/core/filter.c b/net/core/filter.c index 654c346b7d91..68800d1b8cd5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -45,6 +45,7 @@ #include <linux/filter.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> +#include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/bpf.h> #include <linux/btf.h> @@ -5084,6 +5085,46 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { .arg4_type = ARG_ANYTHING, }; +#if IS_ENABLED(CONFIG_BRIDGE) +BPF_CALL_4(bpf_xdp_fdb_lookup, struct xdp_buff *, ctx, + struct bpf_fdb_lookup *, params, int, plen, u32, flags) +{ + struct net_device *src, *dst; + struct net *net; + + if (plen < sizeof(*params)) + return -EINVAL; + + net = dev_net(ctx->rxq->dev); + + if (is_multicast_ether_addr(params->addr) || + is_broadcast_ether_addr(params->addr)) + return BPF_FDB_LKUP_RET_NOENT; + + src = dev_get_by_index_rcu(net, params->ifindex); + if (unlikely(!src)) + return -ENODEV; + + dst = br_fdb_find_port_xdp(src, params->addr, params->vlan_id); + if (dst) { + params->ifindex = dst->ifindex; + return BPF_FDB_LKUP_RET_SUCCESS; + } + + return BPF_FDB_LKUP_RET_NOENT; +} + +static const struct bpf_func_proto bpf_xdp_fdb_lookup_proto = { + .func = bpf_xdp_fdb_lookup, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, +}; +#endif + #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) { @@ -6477,6 +6518,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_adjust_tail_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; +#if IS_ENABLED(CONFIG_BRIDGE) + case BPF_FUNC_fdb_lookup: + return &bpf_xdp_fdb_lookup_proto; +#endif #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_udp: return &bpf_xdp_sk_lookup_udp_proto; diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 5bfa448b4704..49ebd2273614 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -448,6 +448,7 @@ class PrinterHelpers(Printer): '__wsum', 'struct bpf_fib_lookup', + 'struct bpf_fdb_lookup', 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', 'struct bpf_pidns_info', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 54d0c886e3ba..f2e729dd1721 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2149,6 +2149,22 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * + * long bpf_fdb_lookup(void *ctx, struct bpf_fdb_lookup *params, int plen, u32 flags) + * Description + * Do FDB lookup in kernel tables using parameters in *params*. + * If lookup is successful (ie., FDB lookup finds a destination entry), + * ifindex is set to the egress device index from the FDB lookup. + * Both multicast and broadcast packets are currently unsupported + * in XDP layer. + * + * *plen* argument is the size of the passed **struct bpf_fdb_lookup**. + * *ctx* is only **struct xdp_md** for XDP programs. + * + * Return + * * < 0 if any input argument is invalid + * * 0 on success (destination port is found) + * * > 0 on failure (there is no entry) + * * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. @@ -3449,6 +3465,7 @@ union bpf_attr { FN(get_stack), \ FN(skb_load_bytes_relative), \ FN(fib_lookup), \ + FN(fdb_lookup), \ FN(sock_hash_update), \ FN(msg_redirect_hash), \ FN(sk_redirect_hash), \ @@ -4328,6 +4345,17 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +enum { + BPF_FDB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FDB_LKUP_RET_NOENT, /* entry is not found */ +}; + +struct bpf_fdb_lookup { + unsigned char addr[6]; /* ETH_ALEN */ + __u16 vlan_id; + __u32 ifindex; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ -- 2.20.1 (Apple Git-117)