Re: [PATCH bpf-next v2 3/4] bpf: Add support for writing to nf_conn:mark

Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> · Wed, 17 Aug 2022 21:48:31 +0200

On Wed, 17 Aug 2022 at 20:43, Daniel Xu <dxu@xxxxxxxxx> wrote:
>
> Support direct writes to nf_conn:mark from TC and XDP prog types. This
> is useful when applications want to store per-connection metadata. This
> is also particularly useful for applications that run both bpf and
> iptables/nftables because the latter can trivially access this metadata.
>
> One example use case would be if a bpf prog is responsible for advanced
> packet classification and iptables/nftables is later used for routing
> due to pre-existing/legacy code.
>
> Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
> ---
>  include/net/netfilter/nf_conntrack_bpf.h | 18 +++++++++
>  net/core/filter.c                        | 34 ++++++++++++++++
>  net/netfilter/nf_conntrack_bpf.c         | 50 ++++++++++++++++++++++++
>  3 files changed, 102 insertions(+)
>
> diff --git a/include/net/netfilter/nf_conntrack_bpf.h b/include/net/netfilter/nf_conntrack_bpf.h
> index a473b56842c5..0f584c2bd475 100644
> --- a/include/net/netfilter/nf_conntrack_bpf.h
> +++ b/include/net/netfilter/nf_conntrack_bpf.h
> @@ -3,6 +3,7 @@
>  #ifndef _NF_CONNTRACK_BPF_H
>  #define _NF_CONNTRACK_BPF_H
>
> +#include <linux/bpf.h>
>  #include <linux/btf.h>
>  #include <linux/kconfig.h>
>
> @@ -10,6 +11,12 @@
>      (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
>
>  extern int register_nf_conntrack_bpf(void);
> +extern int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
> +                                         const struct btf *btf,
> +                                         const struct btf_type *t, int off,
> +                                         int size, enum bpf_access_type atype,
> +                                         u32 *next_btf_id,
> +                                         enum bpf_type_flag *flag);
>
>  #else
>
> @@ -18,6 +25,17 @@ static inline int register_nf_conntrack_bpf(void)
>         return 0;
>  }
>
> +static inline int
> +nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
> +                              const struct btf *btf,
> +                              const struct btf_type *t, int off,
> +                              int size, enum bpf_access_type atype,
> +                              u32 *next_btf_id,
> +                              enum bpf_type_flag *flag)
> +{
> +       return -EACCES;
> +}
> +

We should make it work when nf_conntrack is a kernel module as well,
not just when it is compiled in. The rest of the stuff already works
when it is a module. For that, you can have a global function pointer
for this callback, protected by a mutex. register/unregister sets
it/unsets it. Each time you call it requires mutex to be held during
the call.

Later when we have more modules that supply btf_struct_access callback
for their module types we can generalize it, for now it should be ok
to hardcode it for nf_conn.

>  #endif
>
>  #endif /* _NF_CONNTRACK_BPF_H */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 5669248aff25..d7b768fe9de7 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -18,6 +18,7 @@
>   */
>
>  #include <linux/atomic.h>
> +#include <linux/bpf_verifier.h>
>  #include <linux/module.h>
>  #include <linux/types.h>
>  #include <linux/mm.h>
> @@ -55,6 +56,7 @@
>  #include <net/sock_reuseport.h>
>  #include <net/busy_poll.h>
>  #include <net/tcp.h>
> +#include <net/netfilter/nf_conntrack_bpf.h>
>  #include <net/xfrm.h>
>  #include <net/udp.h>
>  #include <linux/bpf_trace.h>
> @@ -8710,6 +8712,21 @@ static bool tc_cls_act_is_valid_access(int off, int size,
>         return bpf_skb_is_valid_access(off, size, type, prog, info);
>  }
>
> +static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
> +                                       const struct btf *btf,
> +                                       const struct btf_type *t, int off,
> +                                       int size, enum bpf_access_type atype,
> +                                       u32 *next_btf_id,
> +                                       enum bpf_type_flag *flag)
> +{
> +       if (atype == BPF_READ)
> +               return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
> +                                        flag);
> +
> +       return nf_conntrack_btf_struct_access(log, btf, t, off, size, atype,
> +                                             next_btf_id, flag);
> +}
> +
>  static bool __is_valid_xdp_access(int off, int size)
>  {
>         if (off < 0 || off >= sizeof(struct xdp_md))
> @@ -8769,6 +8786,21 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
>  }
>  EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
>
> +static int xdp_btf_struct_access(struct bpf_verifier_log *log,
> +                                const struct btf *btf,
> +                                const struct btf_type *t, int off,
> +                                int size, enum bpf_access_type atype,
> +                                u32 *next_btf_id,
> +                                enum bpf_type_flag *flag)
> +{
> +       if (atype == BPF_READ)
> +               return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
> +                                        flag);
> +
> +       return nf_conntrack_btf_struct_access(log, btf, t, off, size, atype,
> +                                             next_btf_id, flag);
> +}
> +
>  static bool sock_addr_is_valid_access(int off, int size,
>                                       enum bpf_access_type type,
>                                       const struct bpf_prog *prog,
> @@ -10663,6 +10695,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
>         .convert_ctx_access     = tc_cls_act_convert_ctx_access,
>         .gen_prologue           = tc_cls_act_prologue,
>         .gen_ld_abs             = bpf_gen_ld_abs,
> +       .btf_struct_access      = tc_cls_act_btf_struct_access,
>  };
>
>  const struct bpf_prog_ops tc_cls_act_prog_ops = {
> @@ -10674,6 +10707,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = {
>         .is_valid_access        = xdp_is_valid_access,
>         .convert_ctx_access     = xdp_convert_ctx_access,
>         .gen_prologue           = bpf_noop_prologue,
> +       .btf_struct_access      = xdp_btf_struct_access,
>  };
>
>  const struct bpf_prog_ops xdp_prog_ops = {
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index 1cd87b28c9b0..8010cc542d17 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -6,6 +6,7 @@
>   * are exposed through to BPF programs is explicitly unstable.
>   */
>
> +#include <linux/bpf_verifier.h>
>  #include <linux/bpf.h>
>  #include <linux/btf.h>
>  #include <linux/types.h>
> @@ -15,6 +16,8 @@
>  #include <net/netfilter/nf_conntrack_bpf.h>
>  #include <net/netfilter/nf_conntrack_core.h>
>
> +static const struct btf_type *nf_conn_type;
> +
>  /* bpf_ct_opts - Options for CT lookup helpers
>   *
>   * Members:
> @@ -184,6 +187,53 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>         return ct;
>  }
>
> +/* Check writes into `struct nf_conn` */
> +int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
> +                                  const struct btf *btf,
> +                                  const struct btf_type *t, int off,
> +                                  int size, enum bpf_access_type atype,
> +                                  u32 *next_btf_id,
> +                                  enum bpf_type_flag *flag)
> +{
> +       const struct btf_type *nct = READ_ONCE(nf_conn_type);
> +       s32 type_id;
> +       size_t end;
> +
> +       if (!nct) {
> +               type_id = btf_find_by_name_kind(btf, "nf_conn", BTF_KIND_STRUCT);
> +               if (type_id < 0)
> +                       return -EINVAL;
> +
> +               nct = btf_type_by_id(btf, type_id);
> +               WRITE_ONCE(nf_conn_type, nct);

Instead of this, why not just use BTF_ID_LIST_SINGLE to get the
type_id and then match 't' to the result of btf_type_by_id?
btf_type_by_id is not expensive.

> +       }
> +
> +       if (t != nct) {
> +               bpf_log(log, "only read is supported\n");
> +               return -EACCES;
> +       }
> +
> +       switch (off) {
> +#if defined(CONFIG_NF_CONNTRACK_MARK)
> +       case offsetof(struct nf_conn, mark):
> +               end = offsetofend(struct nf_conn, mark);
> +               break;
> +#endif
> +       default:
> +               bpf_log(log, "no write support to nf_conn at off %d\n", off);
> +               return -EACCES;
> +       }
> +
> +       if (off + size > end) {
> +               bpf_log(log,
> +                       "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
> +                       off, size, end);
> +               return -EACCES;
> +       }
> +
> +       return NOT_INIT;
> +}
> +
>  __diag_push();
>  __diag_ignore_all("-Wmissing-prototypes",
>                   "Global functions as their definitions will be in nf_conntrack BTF");
> --
> 2.37.1
>