On Tue, 06 Oct 2020 18:03:01 +0200 Jesper Dangaard Brouer <brouer@xxxxxxxxxx> wrote: > FIXME: add description. Ups, I will obviously send a V2. I still want feedback on whether I should implement another BPF-helper as sketched below: > FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check() > instead of bpf_mtu_lookup(), because a flag can be used for requesting > GRO segment size checking. The ret value of bpf_mtu_check() says > if MTU was violoated, but also return MTU via pointer arg to allow > BPF-progs to do own logic. > > Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> > --- > include/uapi/linux/bpf.h | 13 +++++++++++ > net/core/filter.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 69 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 50ce65e37b16..29b335cb96ef 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3718,6 +3718,18 @@ union bpf_attr { > * never return NULL. > * Return > * A pointer pointing to the kernel percpu variable on this cpu. > + * > + * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags) > + * Description > + * Lookup MTU of net device based on ifindex. The Linux kernel > + * route table can configure MTUs on a more specific per route > + * level, which is not provided by this helper. For route level > + * MTU checks use the **bpf_fib_lookup**\ () helper. > + * > + * *ctx* is either **struct xdp_md** for XDP programs or > + * **struct sk_buff** tc cls_act programs. > + * Return > + * On success, MTU size is returned. On error, a negative value. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -3875,6 +3887,7 @@ union bpf_attr { > FN(redirect_neigh), \ > FN(bpf_per_cpu_ptr), \ > FN(bpf_this_cpu_ptr), \ > + FN(mtu_lookup), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/net/core/filter.c b/net/core/filter.c > index d84723f347c0..49ae3b80027b 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5512,6 +5512,58 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { > .arg4_type = ARG_ANYTHING, > }; > > +static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags) > +{ > + struct net_device *dev; > + > + // XXX: Do we even need flags? > + // Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev > + if (flags) > + return -EINVAL; > + > + dev = dev_get_by_index_rcu(netns, ifindex); > + if (!dev) > + return -ENODEV; > + > + return dev->mtu; > +} > + > +BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb, > + u32, ifindex, u64, flags) > +{ > + struct net *netns = dev_net(skb->dev); > + > + return bpf_mtu_lookup(netns, ifindex, flags); > +} > + > +BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp, > + u32, ifindex, u64, flags) > +{ > + struct net *netns = dev_net(xdp->rxq->dev); > + // XXX: Handle if this runs in devmap prog (then is rxq invalid?) > + > + return bpf_mtu_lookup(netns, ifindex, flags); > +} > + > +static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = { > + .func = bpf_skb_mtu_lookup, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_ANYTHING, > + .arg3_type = ARG_ANYTHING, > +}; > + > +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = { > + .func = bpf_xdp_mtu_lookup, > + .gpl_only = true, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_CTX, > + .arg2_type = ARG_ANYTHING, > + .arg3_type = ARG_ANYTHING, > +}; > + > + > #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) > static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) > { > @@ -7075,6 +7127,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_get_socket_uid_proto; > case BPF_FUNC_fib_lookup: > return &bpf_skb_fib_lookup_proto; > + case BPF_FUNC_mtu_lookup: > + return &bpf_skb_mtu_lookup_proto; > case BPF_FUNC_sk_fullsock: > return &bpf_sk_fullsock_proto; > case BPF_FUNC_sk_storage_get: > @@ -7144,6 +7198,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_xdp_adjust_tail_proto; > case BPF_FUNC_fib_lookup: > return &bpf_xdp_fib_lookup_proto; > + case BPF_FUNC_mtu_lookup: > + return &bpf_xdp_mtu_lookup_proto; > #ifdef CONFIG_INET > case BPF_FUNC_sk_lookup_udp: > return &bpf_xdp_sk_lookup_udp_proto; > > -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer