This commit adds support for enabling IP defrag using pre-existing netfilter defrag support. Basically all the flag does is bump a refcnt while the link the active. Checks are also added to ensure the prog requesting defrag support is run _after_ netfilter defrag hooks. Reviewed-by: Florian Westphal <fw@xxxxxxxxx> Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> --- include/uapi/linux/bpf.h | 5 ++ net/netfilter/nf_bpf_link.c | 129 ++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 5 ++ 3 files changed, 128 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 600d0caebbd8..c820076c38db 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1180,6 +1180,11 @@ enum bpf_perf_event_type { */ #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) +/* link_create.netfilter.flags used in LINK_CREATE command for + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. + */ +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index c36da56d756f..5b72aa246577 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/kmod.h> #include <linux/netfilter.h> #include <net/netfilter/nf_bpf_link.h> @@ -23,8 +24,98 @@ struct bpf_nf_link { struct nf_hook_ops hook_ops; struct net *net; u32 dead; + bool defrag; }; +static int bpf_nf_enable_defrag(struct bpf_nf_link *link) +{ + const struct nf_defrag_v4_hook __maybe_unused *v4_hook; + const struct nf_defrag_v6_hook __maybe_unused *v6_hook; + int err; + + switch (link->hook_ops.pf) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + case NFPROTO_IPV4: + rcu_read_lock(); + v4_hook = rcu_dereference(nf_defrag_v4_hook); + if (!v4_hook) { + rcu_read_unlock(); + err = request_module("nf_defrag_ipv4"); + if (err) + return err < 0 ? err : -EINVAL; + + rcu_read_lock(); + v4_hook = rcu_dereference(nf_defrag_v4_hook); + if (!v4_hook) { + WARN_ONCE(1, "nf_defrag_ipv4 bad registration"); + err = -ENOENT; + goto out_v4; + } + } + + err = v4_hook->enable(link->net); +out_v4: + rcu_read_unlock(); + return err; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + case NFPROTO_IPV6: + rcu_read_lock(); + v6_hook = rcu_dereference(nf_defrag_v6_hook); + if (!v6_hook) { + rcu_read_unlock(); + err = request_module("nf_defrag_ipv6"); + if (err) + return err < 0 ? err : -EINVAL; + + rcu_read_lock(); + v6_hook = rcu_dereference(nf_defrag_v6_hook); + if (!v6_hook) { + WARN_ONCE(1, "nf_defrag_ipv6_hooks bad registration"); + err = -ENOENT; + goto out_v6; + } + } + + err = v6_hook->enable(link->net); +out_v6: + rcu_read_unlock(); + return err; +#endif + default: + return -EAFNOSUPPORT; + } +} + +static void bpf_nf_disable_defrag(struct bpf_nf_link *link) +{ + const struct nf_defrag_v4_hook __maybe_unused *v4_hook; + const struct nf_defrag_v6_hook __maybe_unused *v6_hook; + + switch (link->hook_ops.pf) { +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + case NFPROTO_IPV4: + rcu_read_lock(); + v4_hook = rcu_dereference(nf_defrag_v4_hook); + if (v4_hook) + v4_hook->disable(link->net); + rcu_read_unlock(); + + break; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + case NFPROTO_IPV6: + rcu_read_lock(); + v6_hook = rcu_dereference(nf_defrag_v6_hook); + if (v6_hook) + v6_hook->disable(link->net); + rcu_read_unlock(); + + break; + } +#endif +} + static void bpf_nf_link_release(struct bpf_link *link) { struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); @@ -37,6 +128,9 @@ static void bpf_nf_link_release(struct bpf_link *link) */ if (!cmpxchg(&nf_link->dead, 0, 1)) nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops); + + if (nf_link->defrag) + bpf_nf_disable_defrag(nf_link); } static void bpf_nf_link_dealloc(struct bpf_link *link) @@ -92,6 +186,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = { static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) { + int prio; + switch (attr->link_create.netfilter.pf) { case NFPROTO_IPV4: case NFPROTO_IPV6: @@ -102,19 +198,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) return -EAFNOSUPPORT; } - if (attr->link_create.netfilter.flags) + if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG) return -EOPNOTSUPP; - /* make sure conntrack confirm is always last. - * - * In the future, if userspace can e.g. request defrag, then - * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG" - * should fail. - */ - switch (attr->link_create.netfilter.priority) { - case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */ - case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */ - } + /* make sure conntrack confirm is always last */ + prio = attr->link_create.netfilter.priority; + if (prio == NF_IP_PRI_FIRST) + return -ERANGE; /* sabotage_in and other warts */ + else if (prio == NF_IP_PRI_LAST) + return -ERANGE; /* e.g. conntrack confirm */ + else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) && + prio <= NF_IP_PRI_CONNTRACK_DEFRAG) + return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */ return 0; } @@ -156,6 +251,18 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } + if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) { + err = bpf_nf_enable_defrag(link); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + /* only mark defrag enabled if enabling succeeds so cleanup path + * doesn't disable without a corresponding enable + */ + link->defrag = true; + } + err = nf_register_net_hook(net, &link->hook_ops); if (err) { bpf_link_cleanup(&link_primer); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 600d0caebbd8..c820076c38db 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1180,6 +1180,11 @@ enum bpf_perf_event_type { */ #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) +/* link_create.netfilter.flags used in LINK_CREATE command for + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. + */ +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * -- 2.41.0