On Sat, 20 Aug 2022 at 01:23, Daniel Xu <dxu@xxxxxxxxx> wrote: > > Support direct writes to nf_conn:mark from TC and XDP prog types. This > is useful when applications want to store per-connection metadata. This > is also particularly useful for applications that run both bpf and > iptables/nftables because the latter can trivially access this metadata. > > One example use case would be if a bpf prog is responsible for advanced > packet classification and iptables/nftables is later used for routing > due to pre-existing/legacy code. > > Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> > --- > include/net/netfilter/nf_conntrack_bpf.h | 13 +++++ > net/core/filter.c | 50 ++++++++++++++++++ > net/netfilter/nf_conntrack_bpf.c | 64 +++++++++++++++++++++++- > net/netfilter/nf_conntrack_core.c | 1 + > 4 files changed, 127 insertions(+), 1 deletion(-) > > diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h > index a473b56842c5..4ef89ee5b5a9 100644 > --- a/include/net/netfilter/nf_conntrack_bpf.h > +++ b/include/net/netfilter/nf_conntrack_bpf.h > @@ -3,13 +3,22 @@ > #ifndef _NF_CONNTRACK_BPF_H > #define _NF_CONNTRACK_BPF_H > > +#include <linux/bpf.h> > #include <linux/btf.h> > #include <linux/kconfig.h> > > +extern int (*nf_conntrack_btf_struct_access)(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag); > + > #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ > (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) > > extern int register_nf_conntrack_bpf(void); > +extern void cleanup_nf_conntrack_bpf(void); > > #else > > @@ -18,6 +27,10 @@ static inline int register_nf_conntrack_bpf(void) > return 0; > } > > +static inline void cleanup_nf_conntrack_bpf(void) > +{ > +} > + > #endif > > #endif /* _NF_CONNTRACK_BPF_H */ > diff --git a/net/core/filter.c b/net/core/filter.c > index 1acfaffeaf32..e5f48e6030b7 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -18,6 +18,7 @@ > */ > > #include <linux/atomic.h> > +#include <linux/bpf_verifier.h> > #include <linux/module.h> > #include <linux/types.h> > #include <linux/mm.h> > @@ -55,6 +56,7 @@ > #include <net/sock_reuseport.h> > #include <net/busy_poll.h> > #include <net/tcp.h> > +#include <net/netfilter/nf_conntrack_bpf.h> > #include <net/xfrm.h> > #include <net/udp.h> > #include <linux/bpf_trace.h> > @@ -8628,6 +8630,32 @@ static bool tc_cls_act_is_valid_access(int off, int size, > return bpf_skb_is_valid_access(off, size, type, prog, info); > } > > +typedef int (*btf_struct_access_t)(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, int size, > + enum bpf_access_type atype, > + u32 *next_btf_id, enum bpf_type_flag *flag); > + > +static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, > + const struct btf *btf, > + const struct btf_type *t, int off, > + int size, enum bpf_access_type atype, > + u32 *next_btf_id, > + enum bpf_type_flag *flag) > +{ > + btf_struct_access_t sa; > + > + if (atype == BPF_READ) > + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, > + flag); > + > + sa = READ_ONCE(nf_conntrack_btf_struct_access); This looks unsafe. How do you prevent this race? CPU 0 CPU 1 sa = READ_ONCE(nf_ct_bsa); delete_module("nf_conntrack", ..); WRITE_ONCE(nf_ct_bsa, NULL); // finishes successfully if (sa) return sa(...); // oops i.e. what keeps the module alive while we execute its callback? Using a mutex is one way (as I suggested previously), either you acquire it before unload, or after. If after, you see cb as NULL, otherwise if unload is triggered concurrently it waits to acquire the mutex held by us. Unsetting the cb would be the first thing the module would do. You can also hold a module reference, but then you must verify it is nf_conntrack's BTF before using btf_try_get_module. But _something_ needs to be done to prevent the module from going away while we execute its code. > + if (sa) > + return sa(log, btf, t, off, size, atype, next_btf_id, flag); > + > + return -EACCES; > +} > + > [...]