Re: [PATCH v1 bpf-next 2/4] bpf: Refactor btf_find_field with btf_field_info_search

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Wed, 1 Nov 2023 14:56:48 -0700

On Mon, Oct 23, 2023 at 3:00 PM Dave Marchevsky <davemarchevsky@xxxxxx> wrote:
>
> btf_find_field takes (btf_type, special_field_types) and returns info
> about the specific special fields in btf_type, in the form of an array
> of struct btf_field info. The meat of this 'search for special fields'
> process happens in btf_find_datasec_var and btf_find_struct_field
> helpers: each member is examined and, if it's special, a struct
> btf_field_info describing it is added to the return array. Indeed, any
> function that might add to the output probably also looks at struct
> members or datasec vars.
>
> Most of the parameters passed around between helpers doing the search
> can be grouped into two categories: "info about the output array" and
> "info about which fields to search for". This patch joins those together
> in struct btf_field_info_search, simplifying the signatures of most
> helpers involved in the search, including array flattening helper that
> later patches in the series will add.
>
> The aforementioned array flattening logic will greatly increase the
> number of btf_field_info's needed to describe some structs, so this
> patch also turns the statically-sized struct btf_field_info
> info_arr[BTF_FIELDS_MAX] into a growable array with a larger max size.
>
> Implementation notes:
>   * BTF_FIELDS_MAX is now max size of growable btf_field_info *infos
>     instead of initial (and max) size of static result array
>     * Static array before had 10 elems (+1 tmp btf_field_info)
>     * growable array starts with 16 and doubles every time it needs to
>       grow, up to BTF_FIELDS_MAX of 256
>
>   * __next_field_infos is used with next_cnt > 1 later in the series
>
>   * btf_find_{datasec_var, struct_field} have special logic for an edge
>     case where the result array is full but the field being examined
>     gets BTF_FIELD_IGNORE return from btf_find_{struct, kptr,graph_root}
>     * If result wasn't BTF_FIELD_IGNORE, a btf_field_info would have to
>       be added to the array. Since it is we can look at next field.
>     * Before this patch the logic handling this edge case was hard to
>       follow and used a tmp btf_struct_info. This patch moves the
>       add-if-not-ignore logic down into btf_find_{struct, kptr,
>       graph_root}, removing the need to opportunistically grab a
>       btf_field_info to populate before knowing if it's actually
>       necessary. Now a new one is grabbed only if the field shouldn't
>       be ignored.
>
>   * Within btf_find_{datasec_var, struct_field}, each member is
>     currently examined in two phases: first btf_get_field_type checks
>     the member type name, then btf_find_{struct,graph_root,kptr} do
>     additional sanity checking and populate struct btf_field_info. Kptr
>     fields don't have a specific type name, though, so
>     btf_get_field_type assumes that - if we're looking for kptrs - any
>     member that fails type name check could be a kptr field.
>     * As a result btf_find_kptr effectively does all the pointer
>       hopping, sanity checking, and info population. Instead of trying
>       to fit kptr handling into this two-phase model, where it's
>       unwieldy, handle it in a separate codepath when name matching
>       fails.
>
> Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>
> ---
>  include/linux/bpf.h |   4 +-
>  kernel/bpf/btf.c    | 331 +++++++++++++++++++++++++++++---------------
>  2 files changed, 219 insertions(+), 116 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index b4825d3cdb29..e07cac5cc3cf 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -171,8 +171,8 @@ struct bpf_map_ops {
>  };
>
>  enum {
> -       /* Support at most 10 fields in a BTF type */
> -       BTF_FIELDS_MAX     = 10,
> +       /* Support at most 256 fields in a BTF type */
> +       BTF_FIELDS_MAX     = 256,
>  };
>
>  enum btf_field_type {
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 975ef8e73393..e999ba85c363 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3257,25 +3257,94 @@ struct btf_field_info {
>         };
>  };
>
> +struct btf_field_info_search {
> +       /* growable array. allocated in __next_field_infos
> +        * free'd in btf_parse_fields
> +        */
> +       struct btf_field_info *infos;
> +       /* size of infos */
> +       int info_cnt;
> +       /* index of next btf_field_info to populate */
> +       int idx;

this seems pretty unconventional naming, typically info_cnt would be
called "cap" (for capacity) and idx would be "cnt" (number of actually
filled elements)

> +
> +       /* btf_field_types to search for */
> +       u32 field_mask;
> +       /* btf_field_types found earlier */
> +       u32 seen_mask;
> +};
> +
> +/* Reserve next_cnt contiguous btf_field_info's for caller to populate
> + * Returns ptr to first reserved btf_field_info
> + */
> +static struct btf_field_info *__next_field_infos(struct btf_field_info_search *srch,
> +                                                u32 next_cnt)

both next_cnt and __next_field_infos that do allocation are quite
surprising, this is a function to add/append more elements, so why not
add_field_infos() and new_cnt or add_cnt? The terminology around
"next" is confusing, IMO, because you'd expect this to be about
iteration, not memory allocation.

> +{
> +       struct btf_field_info *new_infos, *ret;
> +
> +       if (!next_cnt)
> +               return ERR_PTR(-EINVAL);
> +
> +       if (srch->idx + next_cnt < srch->info_cnt)
> +               goto nogrow_out;

why goto? just update count and return a pointer? Goto makes sense if
there is at least two code paths with the same exit logic.

> +
> +       /* Need to grow */
> +       if (srch->idx + next_cnt > BTF_FIELDS_MAX)
> +               return ERR_PTR(-E2BIG);
> +
> +       while (srch->idx + next_cnt >= srch->info_cnt)
> +               srch->info_cnt = srch->infos ? srch->info_cnt * 2 : 16;

I think krealloc() is smart enough to not allocate exact number of
bytes, and instead is rounding it up to closes bucket size. So I think
you can just keep it simple and ask for `srch->idx + next_cnt`
elements, and leave smartness to krealloc(). Not sure why 16 is the
starting size, though? How many elements do we typically have? 1, 2,
3, 5?

> +
> +       new_infos = krealloc(srch->infos,
> +                            srch->info_cnt * sizeof(struct btf_field_info),
> +                            GFP_KERNEL | __GFP_NOWARN);
> +       if (!new_infos)
> +               return ERR_PTR(-ENOMEM);
> +       srch->infos = new_infos;
> +
> +nogrow_out:
> +       ret = &srch->infos[srch->idx];
> +       srch->idx += next_cnt;
> +       return ret;
> +}
> +
> +/* Request srch's next free btf_field_info to populate, possibly growing
> + * srch->infos
> + */
> +static struct btf_field_info *__next_field_info(struct btf_field_info_search *srch)
> +{
> +       return __next_field_infos(srch, 1);
> +}
> +
>  static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
>                            u32 off, int sz, enum btf_field_type field_type,
> -                          struct btf_field_info *info)
> +                          struct btf_field_info_search *srch)
>  {
> +       struct btf_field_info *info;
> +
>         if (!__btf_type_is_struct(t))
>                 return BTF_FIELD_IGNORE;
>         if (t->size != sz)
>                 return BTF_FIELD_IGNORE;
> +
> +       info = __next_field_info(srch);
> +       if (IS_ERR_OR_NULL(info))

Can it return NULL? If not, let's not check for NULL at all, it's misleading.

> +               return PTR_ERR(info);
> +
>         info->type = field_type;
>         info->off = off;
>         return BTF_FIELD_FOUND;
>  }
>
> -static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
> -                        u32 off, int sz, struct btf_field_info *info)
> +static int btf_maybe_find_kptr(const struct btf *btf, const struct btf_type *t,
> +                              u32 off, struct btf_field_info_search *srch)
>  {
> +       struct btf_field_info *info;
>         enum btf_field_type type;
>         u32 res_id;
>
> +       if (!(srch->field_mask & BPF_KPTR))
> +               return BTF_FIELD_IGNORE;
> +
>         /* Permit modifiers on the pointer itself */
>         if (btf_type_is_volatile(t))
>                 t = btf_type_by_id(btf, t->type);
> @@ -3304,6 +3373,10 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
>         if (!__btf_type_is_struct(t))
>                 return -EINVAL;
>
> +       info = __next_field_info(srch);
> +       if (IS_ERR_OR_NULL(info))
> +               return PTR_ERR(info);
> +

ditto

>         info->type = type;
>         info->off = off;
>         info->kptr.type_id = res_id;

[...]

>  #undef field_mask_test_name_check_seen
>  #undef field_mask_test_name
>
> +static int __struct_member_check_align(u32 off, enum btf_field_type field_type)
> +{
> +       u32 align = btf_field_type_align(field_type);
> +
> +       if (off % align)

maybe guard against zero division? WARN_ON_ONCE() in
btf_field_type_align() shouldn't cause crash here

> +               return -EINVAL;
> +       return 0;
> +}
> +
>  static int btf_find_struct_field(const struct btf *btf,
> -                                const struct btf_type *t, u32 field_mask,
> -                                struct btf_field_info *info, int info_cnt)
> +                                const struct btf_type *t,
> +                                struct btf_field_info_search *srch)
>  {
> -       int ret, idx = 0, align, sz, field_type;
>         const struct btf_member *member;
> -       struct btf_field_info tmp;
> -       u32 i, off, seen_mask = 0;
> +       int ret, field_type;
> +       u32 i, off, sz;
>

[...]

> +
> +static int __datasec_vsi_check_align_sz(const struct btf_var_secinfo *vsi,
> +                                       enum btf_field_type field_type,
> +                                       u32 expected_sz)
> +{
> +       u32 off, align;
> +
> +       off = vsi->offset;
> +       align = btf_field_type_align(field_type);
> +
> +       if (vsi->size != expected_sz)
> +               return -EINVAL;
> +       if (off % align)
> +               return -EINVAL;

same about possible align == 0

> +
> +       return 0;
>  }
>

[...]