FIXME: add description. FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check() instead of bpf_mtu_lookup(), because a flag can be used for requesting GRO segment size checking. The ret value of bpf_mtu_check() says if MTU was violoated, but also return MTU via pointer arg to allow BPF-progs to do own logic. Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> --- include/uapi/linux/bpf.h | 13 +++++++++++ net/core/filter.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 50ce65e37b16..29b335cb96ef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3718,6 +3718,18 @@ union bpf_attr { * never return NULL. * Return * A pointer pointing to the kernel percpu variable on this cpu. + * + * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags) + * Description + * Lookup MTU of net device based on ifindex. The Linux kernel + * route table can configure MTUs on a more specific per route + * level, which is not provided by this helper. For route level + * MTU checks use the **bpf_fib_lookup**\ () helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return + * On success, MTU size is returned. On error, a negative value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3875,6 +3887,7 @@ union bpf_attr { FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ + FN(mtu_lookup), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index d84723f347c0..49ae3b80027b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5512,6 +5512,58 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { .arg4_type = ARG_ANYTHING, }; +static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags) +{ + struct net_device *dev; + + // XXX: Do we even need flags? + // Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev + if (flags) + return -EINVAL; + + dev = dev_get_by_index_rcu(netns, ifindex); + if (!dev) + return -ENODEV; + + return dev->mtu; +} + +BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb, + u32, ifindex, u64, flags) +{ + struct net *netns = dev_net(skb->dev); + + return bpf_mtu_lookup(netns, ifindex, flags); +} + +BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp, + u32, ifindex, u64, flags) +{ + struct net *netns = dev_net(xdp->rxq->dev); + // XXX: Handle if this runs in devmap prog (then is rxq invalid?) + + return bpf_mtu_lookup(netns, ifindex, flags); +} + +static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = { + .func = bpf_skb_mtu_lookup, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { + .func = bpf_xdp_mtu_lookup, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + + #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) { @@ -7075,6 +7127,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: return &bpf_skb_fib_lookup_proto; + case BPF_FUNC_mtu_lookup: + return &bpf_skb_mtu_lookup_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; case BPF_FUNC_sk_storage_get: @@ -7144,6 +7198,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_adjust_tail_proto; case BPF_FUNC_fib_lookup: return &bpf_xdp_fib_lookup_proto; + case BPF_FUNC_mtu_lookup: + return &bpf_xdp_mtu_lookup_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_udp: return &bpf_xdp_sk_lookup_udp_proto;