On Wed, 17 Aug 2022 at 20:43, Daniel Xu <dxu@xxxxxxxxx> wrote: > > Support direct writes to nf_conn:mark from TC and XDP prog types. This > is useful when applications want to store per-connection metadata. This > is also particularly useful for applications that run both bpf and > iptables/nftables because the latter can trivially access this metadata. > > One example use case would be if a bpf prog is responsible for advanced > packet classification and iptables/nftables is later used for routing > due to pre-existing/legacy code. > > Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> > --- > include/net/netfilter/nf_conntrack_bpf.h | 18 +++++++++ > net/core/filter.c | 34 ++++++++++++++++ > net/netfilter/nf_conntrack_bpf.c | 50 ++++++++++++++++++++++++ > 3 files changed, 102 insertions(+) > > diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h > index a473b56842c5..0f584c2bd475 100644 > --- a/include/net/netfilter/nf_conntrack_bpf.h > +++ b/include/net/netfilter/nf_conntrack_bpf.h > @@ -3,6 +3,7 @@ > #ifndef _NF_CONNTRACK_BPF_H > #define _NF_CONNTRACK_BPF_H > > +#include <linux/bpf.h> > #include <linux/btf.h> > #include <linux/kconfig.h> > > @@ -10,6 +11,12 @@ > (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) > > extern int register_nf_conntrack_bpf(void); > +extern int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag); > > #else > > @@ -18,6 +25,17 @@ static inline int register_nf_conntrack_bpf(void) > return 0; > } > > +static inline int > +nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag) > +{ > + return -EACCES; > +} > + We should make it work when nf_conntrack is a kernel module as well, not just when it is compiled in. The rest of the stuff already works when it is a module. For that, you can have a global function pointer for this callback, protected by a mutex. register/unregister sets it/unsets it. Each time you call it requires mutex to be held during the call. Later when we have more modules that supply btf_struct_access callback for their module types we can generalize it, for now it should be ok to hardcode it for nf_conn. > #endif > > #endif /* _NF_CONNTRACK_BPF_H */ > diff --git a/net/core/filter.c b/net/core/filter.c > index 5669248aff25..d7b768fe9de7 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -18,6 +18,7 @@ > */ > > #include <linux/atomic.h> > +#include <linux/bpf_verifier.h> > #include <linux/module.h> > #include <linux/types.h> > #include <linux/mm.h> > @@ -55,6 +56,7 @@ > #include <net/sock_reuseport.h> > #include <net/busy_poll.h> > #include <net/tcp.h> > +#include <net/netfilter/nf_conntrack_bpf.h> > #include <net/xfrm.h> > #include <net/udp.h> > #include <linux/bpf_trace.h> > @@ -8710,6 +8712,21 @@ static bool tc_cls_act_is_valid_access(int off, int size, > return bpf_skb_is_valid_access(off, size, type, prog, info); > } > > +static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag) > +{ > + if (atype == BPF_READ) > + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, > + flag); > + > + return nf_conntrack_btf_struct_access(log, btf, t, off, size, atype, > + next_btf_id, flag); > +} > + > static bool __is_valid_xdp_access(int off, int size) > { > if (off < 0 || off >= sizeof(struct xdp_md)) > @@ -8769,6 +8786,21 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, > } > EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); > > +static int xdp_btf_struct_access(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag) > +{ > + if (atype == BPF_READ) > + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, > + flag); > + > + return nf_conntrack_btf_struct_access(log, btf, t, off, size, atype, > + next_btf_id, flag); > +} > + > static bool sock_addr_is_valid_access(int off, int size, > enum bpf_access_type type, > const struct bpf_prog *prog, > @@ -10663,6 +10695,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = { > .convert_ctx_access = tc_cls_act_convert_ctx_access, > .gen_prologue = tc_cls_act_prologue, > .gen_ld_abs = bpf_gen_ld_abs, > + .btf_struct_access = tc_cls_act_btf_struct_access, > }; > > const struct bpf_prog_ops tc_cls_act_prog_ops = { > @@ -10674,6 +10707,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = { > .is_valid_access = xdp_is_valid_access, > .convert_ctx_access = xdp_convert_ctx_access, > .gen_prologue = bpf_noop_prologue, > + .btf_struct_access = xdp_btf_struct_access, > }; > > const struct bpf_prog_ops xdp_prog_ops = { > diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c > index 1cd87b28c9b0..8010cc542d17 100644 > --- a/net/netfilter/nf_conntrack_bpf.c > +++ b/net/netfilter/nf_conntrack_bpf.c > @@ -6,6 +6,7 @@ > * are exposed through to BPF programs is explicitly unstable. > */ > > +#include <linux/bpf_verifier.h> > #include <linux/bpf.h> > #include <linux/btf.h> > #include <linux/types.h> > @@ -15,6 +16,8 @@ > #include <net/netfilter/nf_conntrack_bpf.h> > #include <net/netfilter/nf_conntrack_core.h> > > +static const struct btf_type *nf_conn_type; > + > /* bpf_ct_opts - Options for CT lookup helpers > * > * Members: > @@ -184,6 +187,53 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, > return ct; > } > > +/* Check writes into `struct nf_conn` */ > +int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag) > +{ > + const struct btf_type *nct = READ_ONCE(nf_conn_type); > + s32 type_id; > + size_t end; > + > + if (!nct) { > + type_id = btf_find_by_name_kind(btf, "nf_conn", BTF_KIND_STRUCT); > + if (type_id < 0) > + return -EINVAL; > + > + nct = btf_type_by_id(btf, type_id); > + WRITE_ONCE(nf_conn_type, nct); Instead of this, why not just use BTF_ID_LIST_SINGLE to get the type_id and then match 't' to the result of btf_type_by_id? btf_type_by_id is not expensive. > + } > + > + if (t != nct) { > + bpf_log(log, "only read is supported\n"); > + return -EACCES; > + } > + > + switch (off) { > +#if defined(CONFIG_NF_CONNTRACK_MARK) > + case offsetof(struct nf_conn, mark): > + end = offsetofend(struct nf_conn, mark); > + break; > +#endif > + default: > + bpf_log(log, "no write support to nf_conn at off %d\n", off); > + return -EACCES; > + } > + > + if (off + size > end) { > + bpf_log(log, > + "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n", > + off, size, end); > + return -EACCES; > + } > + > + return NOT_INIT; > +} > + > __diag_push(); > __diag_ignore_all("-Wmissing-prototypes", > "Global functions as their definitions will be in nf_conntrack BTF"); > -- > 2.37.1 >