Re: [PATCH bpf-next v3 1/5] bpf: Add bpf_link support for sk_msg and sk_skb progs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 25, 2024 at 7:22 PM Yonghong Song <yonghong.song@xxxxxxxxx> wrote:
>
> Add bpf_link support for sk_msg and sk_skb programs. We have an
> internal request to support bpf_link for sk_msg programs so user
> space can have a uniform handling with bpf_link based libbpf
> APIs. Using bpf_link based libbpf API also has a benefit which
> makes system robust by decoupling prog life cycle and
> attachment life cycle.
>
> Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
> ---
>  include/linux/bpf.h            |   6 +
>  include/linux/skmsg.h          |   4 +
>  include/uapi/linux/bpf.h       |   5 +
>  kernel/bpf/syscall.c           |   4 +
>  net/core/sock_map.c            | 263 ++++++++++++++++++++++++++++++++-
>  tools/include/uapi/linux/bpf.h |   5 +
>  6 files changed, 279 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 62762390c93d..5034c1b4ded7 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2996,6 +2996,7 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
>  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
>  int sock_map_bpf_prog_query(const union bpf_attr *attr,
>                             union bpf_attr __user *uattr);
> +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog);
>
>  void sock_map_unhash(struct sock *sk);
>  void sock_map_destroy(struct sock *sk);
> @@ -3094,6 +3095,11 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
>  {
>         return -EINVAL;
>  }
> +
> +static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
> +{
> +       return -EOPNOTSUPP;
> +}
>  #endif /* CONFIG_BPF_SYSCALL */
>  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index e65ec3fd2799..9c8dd4c01412 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -58,6 +58,10 @@ struct sk_psock_progs {
>         struct bpf_prog                 *stream_parser;
>         struct bpf_prog                 *stream_verdict;
>         struct bpf_prog                 *skb_verdict;
> +       struct bpf_link                 *msg_parser_link;
> +       struct bpf_link                 *stream_parser_link;
> +       struct bpf_link                 *stream_verdict_link;
> +       struct bpf_link                 *skb_verdict_link;
>  };
>
>  enum sk_psock_state_bits {
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9585f5345353..31660c3ffc01 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1135,6 +1135,7 @@ enum bpf_link_type {
>         BPF_LINK_TYPE_TCX = 11,
>         BPF_LINK_TYPE_UPROBE_MULTI = 12,
>         BPF_LINK_TYPE_NETKIT = 13,
> +       BPF_LINK_TYPE_SOCKMAP = 14,
>         __MAX_BPF_LINK_TYPE,
>  };
>
> @@ -6720,6 +6721,10 @@ struct bpf_link_info {
>                         __u32 ifindex;
>                         __u32 attach_type;
>                 } netkit;
> +               struct {
> +                       __u32 map_id;
> +                       __u32 attach_type;
> +               } sockmap;
>         };
>  } __attribute__((aligned(8)));
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index e44c276e8617..7d392ec83655 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -5213,6 +5213,10 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
>         case BPF_PROG_TYPE_SK_LOOKUP:
>                 ret = netns_bpf_link_create(attr, prog);
>                 break;
> +       case BPF_PROG_TYPE_SK_MSG:
> +       case BPF_PROG_TYPE_SK_SKB:
> +               ret = sock_map_link_create(attr, prog);
> +               break;
>  #ifdef CONFIG_NET
>         case BPF_PROG_TYPE_XDP:
>                 ret = bpf_xdp_link_attach(attr, prog);
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 27d733c0f65e..dafc9aa6e192 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -24,8 +24,12 @@ struct bpf_stab {
>  #define SOCK_CREATE_FLAG_MASK                          \
>         (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
>
> +static DEFINE_MUTEX(sockmap_prog_update_mutex);
> +static DEFINE_MUTEX(sockmap_link_mutex);
> +
>  static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> -                               struct bpf_prog *old, u32 which);
> +                               struct bpf_prog *old, struct bpf_link *link,
> +                               u32 which);
>  static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
>
>  static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
> @@ -71,7 +75,7 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
>         map = __bpf_map_get(f);
>         if (IS_ERR(map))
>                 return PTR_ERR(map);
> -       ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
> +       ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type);
>         fdput(f);
>         return ret;
>  }
> @@ -103,7 +107,7 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
>                 goto put_prog;
>         }
>
> -       ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
> +       ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type);
>  put_prog:
>         bpf_prog_put(prog);
>  put_map:
> @@ -1488,21 +1492,90 @@ static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
>         return 0;
>  }
>
> +static int sock_map_link_lookup(struct bpf_map *map, struct bpf_link ***plink,
> +                               struct bpf_link *link, bool skip_check, u32 which)
> +{
> +       struct sk_psock_progs *progs = sock_map_progs(map);
> +
> +       switch (which) {
> +       case BPF_SK_MSG_VERDICT:
> +               if (!skip_check &&
> +                   ((!link && progs->msg_parser_link) ||
> +                    (link && link != progs->msg_parser_link)))
> +                       return -EBUSY;
> +               *plink = &progs->msg_parser_link;
> +               break;
> +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> +       case BPF_SK_SKB_STREAM_PARSER:
> +               if (!skip_check &&
> +                   ((!link && progs->stream_parser_link) ||
> +                    (link && link != progs->stream_parser_link)))
> +                       return -EBUSY;
> +               *plink = &progs->stream_parser_link;
> +               break;
> +#endif
> +       case BPF_SK_SKB_STREAM_VERDICT:
> +               if (!skip_check &&
> +                   ((!link && progs->stream_verdict_link) ||
> +                    (link && link != progs->stream_verdict_link)))
> +                       return -EBUSY;
> +               *plink = &progs->stream_verdict_link;
> +               break;
> +       case BPF_SK_SKB_VERDICT:
> +               if (!skip_check &&
> +                   ((!link && progs->skb_verdict_link) ||
> +                    (link && link != progs->skb_verdict_link)))
> +                       return -EBUSY;
> +               *plink = &progs->skb_verdict_link;
> +               break;
> +       default:
> +               return -EOPNOTSUPP;
> +       }
> +

you can simplify this by

struct bpf_link *cur_link;

switch (which) {
case BPF_SK_MSG_VERDICT:
    cur_link = progs->msg_parser_link;
    break;
case ...

}

and then perform that condition validating link and cur_link just once.


> +       return 0;
> +}
> +
> +/* Handle the following four cases:
> + * prog_attach: prog != NULL, old == NULL, link == NULL
> + * prog_detach: prog == NULL, old != NULL, link == NULL
> + * link_attach: prog != NULL, old == NULL, link != NULL
> + * link_detach: prog == NULL, old != NULL, link != NULL
> + */
>  static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> -                               struct bpf_prog *old, u32 which)
> +                               struct bpf_prog *old, struct bpf_link *link,
> +                               u32 which)
>  {
>         struct bpf_prog **pprog;
> +       struct bpf_link **plink;
>         int ret;
>
> +       mutex_lock(&sockmap_prog_update_mutex);
> +
>         ret = sock_map_prog_lookup(map, &pprog, which);
>         if (ret)
> -               return ret;
> +               goto out;
>
> -       if (old)
> -               return psock_replace_prog(pprog, prog, old);
> +       if (!link || prog)
> +               ret = sock_map_link_lookup(map, &plink, NULL, false, which);
> +       else
> +               ret = sock_map_link_lookup(map, &plink, NULL, true, which);

it feels like it would be cleaner if sock_map_link_lookup() would just
return already set link (based on which), and then you'd check update
logic with prog vs link right here

e.g., it's quite obfuscated that we validate that there is no link set
currently (this code doesn't do anything with plink).

anyways, I find it a bit hard to follow as it's written, but I'll
defer to John to make a final call on logic

> +       if (ret)
> +               goto out;
> +
> +       if (old) {
> +               ret = psock_replace_prog(pprog, prog, old);
> +               if (!ret)
> +                       *plink = NULL;
> +               goto out;
> +       }
>
>         psock_set_prog(pprog, prog);
> -       return 0;
> +       if (link)
> +               *plink = link;
> +
> +out:
> +       mutex_unlock(&sockmap_prog_update_mutex);

why this mutex is not per-sockmap?

> +       return ret;
>  }
>
>  int sock_map_bpf_prog_query(const union bpf_attr *attr,
> @@ -1657,6 +1730,180 @@ void sock_map_close(struct sock *sk, long timeout)
>  }
>  EXPORT_SYMBOL_GPL(sock_map_close);
>
> +struct sockmap_link {
> +       struct bpf_link link;
> +       struct bpf_map *map;
> +       enum bpf_attach_type attach_type;
> +};
> +
> +static struct sockmap_link *get_sockmap_link(const struct bpf_link *link)
> +{
> +       return container_of(link, struct sockmap_link, link);
> +}

nit: do you really need this helper? container_of() is a pretty
straightforward by itself, imo

> +
> +static void sock_map_link_release(struct bpf_link *link)
> +{
> +       struct sockmap_link *sockmap_link = get_sockmap_link(link);
> +
> +       mutex_lock(&sockmap_link_mutex);

similar to the above, why is this mutex not sockmap-specific? And I'd
just combine sockmap_link_mutex and sockmap_prog_update_mutex in this
case to keep it simple.

> +       if (sockmap_link->map) {
> +               (void)sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
> +                                          sockmap_link->attach_type);

WARN() if it's not meant to ever fail?

> +               bpf_map_put_with_uref(sockmap_link->map);
> +               sockmap_link->map = NULL;
> +       }
> +       mutex_unlock(&sockmap_link_mutex);
> +}
> +
> +static int sock_map_link_detach(struct bpf_link *link)
> +{
> +       sock_map_link_release(link);
> +       return 0;
> +}
> +
> +static void sock_map_link_dealloc(struct bpf_link *link)
> +{
> +       kfree(get_sockmap_link(link));
> +}
> +
> +/* Handle the following two cases:
> + * case 1: link != NULL, prog != NULL, old != NULL
> + * case 2: link != NULL, prog != NULL, old == NULL
> + */
> +static int sock_map_link_update_prog(struct bpf_link *link,
> +                                    struct bpf_prog *prog,
> +                                    struct bpf_prog *old)
> +{
> +       const struct sockmap_link *sockmap_link = get_sockmap_link(link);
> +       struct bpf_prog **pprog;
> +       struct bpf_link **plink;
> +       int ret = 0;
> +
> +       mutex_lock(&sockmap_prog_update_mutex);
> +
> +       /* If old prog not NULL, ensure old prog the same as link->prog. */

typo: "is not NULL", "is the same"

> +       if (old && link->prog != old) {

hm.. even if old matches link->prog, we should unset old and set new
link (link overrides prog attachment, basically), it shouldn't matter
if old == link->prog, unless I'm missing something?

> +               ret = -EINVAL;
> +               goto out;
> +       }
> +       /* Ensure link->prog has the same type/attach_type as the new prog. */
> +       if (link->prog->type != prog->type ||
> +           link->prog->expected_attach_type != prog->expected_attach_type) {
> +               ret = -EINVAL;
> +               goto out;
> +       }
> +
> +       ret = sock_map_prog_lookup(sockmap_link->map, &pprog,
> +                                  sockmap_link->attach_type);
> +       if (ret)
> +               goto out;
> +
> +       /* Ensure the same link between the one in map and the passed-in. */
> +       ret = sock_map_link_lookup(sockmap_link->map, &plink, link, false,
> +                                  sockmap_link->attach_type);
> +       if (ret)
> +               goto out;
> +
> +       if (old)
> +               return psock_replace_prog(pprog, prog, old);
> +
> +       psock_set_prog(pprog, prog);
> +
> +out:
> +       if (!ret)
> +               bpf_prog_inc(prog);
> +       mutex_unlock(&sockmap_prog_update_mutex);
> +       return ret;
> +}
> +
> +static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link)
> +{
> +       u32 map_id = 0;
> +
> +       mutex_lock(&sockmap_link_mutex);
> +       if (sockmap_link->map)
> +               map_id = sockmap_link->map->id;
> +       mutex_unlock(&sockmap_link_mutex);
> +       return map_id;
> +}
> +
> +static int sock_map_link_fill_info(const struct bpf_link *link,
> +                                  struct bpf_link_info *info)
> +{
> +       const struct sockmap_link *sockmap_link = get_sockmap_link(link);
> +       u32 map_id = sock_map_link_get_map_id(sockmap_link);
> +
> +       info->sockmap.map_id = map_id;
> +       info->sockmap.attach_type = sockmap_link->attach_type;
> +       return 0;
> +}
> +
> +static void sock_map_link_show_fdinfo(const struct bpf_link *link,
> +                                     struct seq_file *seq)
> +{
> +       const struct sockmap_link *sockmap_link = get_sockmap_link(link);
> +       u32 map_id = sock_map_link_get_map_id(sockmap_link);
> +
> +       seq_printf(seq, "map_id:\t%u\n", map_id);
> +       seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
> +}
> +
> +static const struct bpf_link_ops sock_map_link_ops = {
> +       .release = sock_map_link_release,
> +       .dealloc = sock_map_link_dealloc,
> +       .detach = sock_map_link_detach,
> +       .update_prog = sock_map_link_update_prog,
> +       .fill_link_info = sock_map_link_fill_info,
> +       .show_fdinfo = sock_map_link_show_fdinfo,
> +};
> +
> +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
> +{
> +       struct bpf_link_primer link_primer;
> +       struct sockmap_link *sockmap_link;
> +       enum bpf_attach_type attach_type;
> +       struct bpf_map *map;
> +       int ret;
> +
> +       if (attr->link_create.flags)
> +               return -EINVAL;
> +
> +       map = bpf_map_get_with_uref(attr->link_create.target_fd);
> +       if (IS_ERR(map))
> +               return PTR_ERR(map);

check that map is SOCKMAP?

> +
> +       sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER);
> +       if (!sockmap_link) {
> +               ret = -ENOMEM;
> +               goto out;
> +       }
> +
> +       attach_type = attr->link_create.attach_type;
> +       bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
> +       sockmap_link->map = map;
> +       sockmap_link->attach_type = attach_type;
> +
> +       ret = bpf_link_prime(&sockmap_link->link, &link_primer);
> +       if (ret) {
> +               kfree(sockmap_link);
> +               goto out;
> +       }
> +
> +       ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type);
> +       if (ret) {
> +               bpf_link_cleanup(&link_primer);
> +               goto out;
> +       }
> +
> +       bpf_prog_inc(prog);

if link was created successfully, it "inherits" prog's refcnt, so you
shouldn't do another bpf_prog_inc()? generic link_create() logic puts
prog only if this function returns error

> +
> +       return bpf_link_settle(&link_primer);
> +
> +out:
> +       bpf_map_put_with_uref(map);
> +       return ret;
> +}
> +
>  static int sock_map_iter_attach_target(struct bpf_prog *prog,
>                                        union bpf_iter_link_info *linfo,
>                                        struct bpf_iter_aux_info *aux)
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 9585f5345353..31660c3ffc01 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1135,6 +1135,7 @@ enum bpf_link_type {
>         BPF_LINK_TYPE_TCX = 11,
>         BPF_LINK_TYPE_UPROBE_MULTI = 12,
>         BPF_LINK_TYPE_NETKIT = 13,
> +       BPF_LINK_TYPE_SOCKMAP = 14,
>         __MAX_BPF_LINK_TYPE,
>  };
>
> @@ -6720,6 +6721,10 @@ struct bpf_link_info {
>                         __u32 ifindex;
>                         __u32 attach_type;
>                 } netkit;
> +               struct {
> +                       __u32 map_id;
> +                       __u32 attach_type;
> +               } sockmap;
>         };
>  } __attribute__((aligned(8)));
>
> --
> 2.43.0
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux