Re: [PATCH bpf-next 1/2] bpf: change uapi for bpf iterator map elements

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Sun, 2 Aug 2020 22:11:38 -0700

On Sun, Aug 2, 2020 at 7:23 PM Yonghong Song <yhs@xxxxxx> wrote:
>
>
>
> On 8/2/20 6:25 PM, Andrii Nakryiko wrote:
> > On Sat, Aug 1, 2020 at 9:22 PM Yonghong Song <yhs@xxxxxx> wrote:
> >>
> >> Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
> >> map elements") added bpf iterator support for
> >> map elements. The map element bpf iterator requires
> >> info to identify a particular map. In the above
> >> commit, the attr->link_create.target_fd is used
> >> to carry map_fd and an enum bpf_iter_link_info
> >> is added to uapi to specify the target_fd actually
> >> representing a map_fd:
> >>      enum bpf_iter_link_info {
> >>          BPF_ITER_LINK_UNSPEC = 0,
> >>          BPF_ITER_LINK_MAP_FD = 1,
> >>
> >>          MAX_BPF_ITER_LINK_INFO,
> >>      };
> >>
> >> This is an extensible approach as we can grow
> >> enumerator for pid, cgroup_id, etc. and we can
> >> unionize target_fd for pid, cgroup_id, etc.
> >> But in the future, there are chances that
> >> more complex customization may happen, e.g.,
> >> for tasks, it could be filtered based on
> >> both cgroup_id and user_id.
> >>
> >> This patch changed the uapi to have fields
> >>          __aligned_u64   iter_info;
> >>          __u32           iter_info_len;
> >> for additional iter_info for link_create.
> >> The iter_info is defined as
> >>          union bpf_iter_link_info {
> >>                  struct {
> >>                          __u32   map_fd;
> >>                  } map;
> >>          };
> >>
> >> So future extension for additional customization
> >> will be easier. The bpf_iter_link_info will be
> >> passed to target callback to validate and generic
> >> bpf_iter framework does not need to deal it any
> >> more.
> >>
> >> Signed-off-by: Yonghong Song <yhs@xxxxxx>
> >> ---
> >>   include/linux/bpf.h            | 10 ++++---
> >>   include/uapi/linux/bpf.h       | 15 +++++-----
> >>   kernel/bpf/bpf_iter.c          | 52 +++++++++++++++-------------------
> >>   kernel/bpf/map_iter.c          | 37 ++++++++++++++++++------
> >>   kernel/bpf/syscall.c           |  2 +-
> >>   net/core/bpf_sk_storage.c      | 37 ++++++++++++++++++------
> >>   tools/include/uapi/linux/bpf.h | 15 +++++-----
> >>   7 files changed, 104 insertions(+), 64 deletions(-)
> >>
> >
> > [...]
> >
> >>   int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>   {
> >> +       union bpf_iter_link_info __user *ulinfo;
> >>          struct bpf_link_primer link_primer;
> >>          struct bpf_iter_target_info *tinfo;
> >> -       struct bpf_iter_aux_info aux = {};
> >> +       union bpf_iter_link_info linfo;
> >>          struct bpf_iter_link *link;
> >> -       u32 prog_btf_id, target_fd;
> >> +       u32 prog_btf_id, linfo_len;
> >>          bool existed = false;
> >> -       struct bpf_map *map;
> >>          int err;
> >>
> >> +       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
> >> +
> >> +       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
> >> +       linfo_len = attr->link_create.iter_info_len;
> >> +       if (ulinfo && linfo_len) {
> >
> > We probably want to be more strict here: if either pointer or len is
> > non-zero, both should be present and valid. Otherwise we can have
> > garbage in iter_info, as long as iter_info_len is zero.
>
> yes, it is possible iter_info_len = 0 and iter_info is not null and
> if this happens, iter_info will not be examined.
>
> in kernel, we have places this is handled similarly. For example,
> for cgroup bpf_prog query.
>
> kernel/bpf/cgroup.c, function __cgroup_bpf_query
>
>    __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
>    ...
>    if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
>      return 0;
>
> In the above case, it is possible prog_cnt = 0 and prog_ids != NULL,
> or prog_ids == NULL and prog_cnt != 0, and we won't return error
> to user space.
>
> Not 100% sure whether we have convention here or not.

I don't know either, but I'd assume that we didn't think about 100%
strictness when originally implementing this. So I'd go with a very
strict check for this new functionality.

>
> >
> >> +               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
> >> +                                              linfo_len);
> >> +               if (err)
> >> +                       return err;
> >> +               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
> >> +               if (copy_from_user(&linfo, ulinfo, linfo_len))
> >> +                       return -EFAULT;
> >> +       }
> >> +
> >>          prog_btf_id = prog->aux->attach_btf_id;
> >>          mutex_lock(&targets_mutex);
> >>          list_for_each_entry(tinfo, &targets, list) {
> >> @@ -411,13 +425,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>          if (!existed)
> >>                  return -ENOENT;
> >>
> >> -       /* Make sure user supplied flags are target expected. */
> >> -       target_fd = attr->link_create.target_fd;
> >> -       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
> >> -               return -EINVAL;
> >> -       if (!attr->link_create.flags && target_fd)
> >> -               return -EINVAL;
> >> -
> >
> > Please still ensure that no flags are specified.
>
> Make sense. I also need to ensure target_fd is 0 since it is not used
> any more.
>

yep, good catch

> >
> >
> >>          link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
> >>          if (!link)
> >>                  return -ENOMEM;
> >> @@ -431,28 +438,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> >>                  return err;
> >>          }
> >>
> >
> > [...]
> >
> >> -static int bpf_iter_check_map(struct bpf_prog *prog,
> >> -                             struct bpf_iter_aux_info *aux)
> >> +static int bpf_iter_attach_map(struct bpf_prog *prog,
> >> +                              union bpf_iter_link_info *linfo,
> >> +                              struct bpf_iter_aux_info *aux)
> >>   {
> >> -       struct bpf_map *map = aux->map;
> >> +       struct bpf_map *map;
> >> +       int err = -EINVAL;
> >>
> >> -       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> >> +       if (!linfo->map.map_fd)
> >>                  return -EINVAL;
> >
> > This could be -EBADF?
>
> Good suggestion. Will do.
>
> >
> >>
> >> -       if (prog->aux->max_rdonly_access > map->value_size)
> >> -               return -EACCES;
> >> +       map = bpf_map_get_with_uref(linfo->map.map_fd);
> >> +       if (IS_ERR(map))
> >> +               return PTR_ERR(map);
> >> +
> >> +       if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
> >> +               goto put_map;
> >> +
> >> +       if (prog->aux->max_rdonly_access > map->value_size) {
> >> +               err = -EACCES;
> >> +               goto put_map;
> >> +       }
> >
> > [...]
> >