Re: [PATCH v4 bpf-next 07/11] libbpf: split BTF relocation

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Tue, 21 May 2024 15:34:30 -0700

On Fri, May 17, 2024 at 3:23 AM Alan Maguire <alan.maguire@xxxxxxxxxx> wrote:
>
> Map distilled base BTF type ids referenced in split BTF and their
> references to the base BTF passed in, and if the mapping succeeds,
> reparent the split BTF to the base BTF.
>
> Relocation is done by first verifying that distilled base BTF
> only consists of named INT, FLOAT, ENUM, FWD, STRUCT and
> UNION kinds; then we sort these to speed lookups.  Once sorted,
> the base BTF is iterated, and for each relevant kind we check
> for an equivalent in distilled base BTF.  When found, the
> mapping from distilled -> base BTF id and string offset is recorded.
>
> Once all mappings are established, we can update type ids
> and string offsets in split BTF and reparent it to the new base.
>
> Signed-off-by: Alan Maguire <alan.maguire@xxxxxxxxxx>
> ---
>  tools/lib/bpf/Build             |   2 +-
>  tools/lib/bpf/btf.c             |  17 ++
>  tools/lib/bpf/btf.h             |   8 +
>  tools/lib/bpf/btf_relocate.c    | 318 ++++++++++++++++++++++++++++++++
>  tools/lib/bpf/libbpf.map        |   1 +
>  tools/lib/bpf/libbpf_internal.h |   3 +
>  6 files changed, 348 insertions(+), 1 deletion(-)
>  create mode 100644 tools/lib/bpf/btf_relocate.c

[...]

>  LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
>
> +/**
> + * @brief **btf__relocate()** will check the split BTF *btf* for references
> + * to base BTF kinds, and verify those references are compatible with
> + * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
> + * *base_btf* and type ids and strings are adjusted to accommodate this.
> + */

add boilerplate regarding return results?..

> +LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf);
> +
>  struct btf_dump;
>
>  struct btf_dump_opts {
> diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c
> new file mode 100644
> index 000000000000..c06851f05472
> --- /dev/null
> +++ b/tools/lib/bpf/btf_relocate.c
> @@ -0,0 +1,318 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2024, Oracle and/or its affiliates. */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include "btf.h"
> +#include "bpf.h"
> +#include "libbpf.h"
> +#include "libbpf_internal.h"
> +
> +struct btf;
> +
> +struct btf_relocate {
> +       __u32 search_id;                                /* must be first field; see search below */

just put that comment before the field, why this horizontal placement?

[...]

> +
> +/* Comparison between base BTF type (search type) and distilled base types (target).
> + * Because there is no bsearch_r() we need to use the search key - which also is
> + * the first element of struct btf_relocate * - as a means to retrieve the
> + * struct btf_relocate *.
> + */
> +static int cmp_base_and_distilled_btf_types(const void *idbase, const void *iddist)
> +{
> +       struct btf_relocate *r = (struct btf_relocate *)idbase;
> +       const struct btf_type *tbase = btf_type_by_id(r->base_btf, *(__u32 *)idbase);
> +       const struct btf_type *tdist = btf_type_by_id(r->dist_base_btf, *(__u32 *)iddist);

boo, id_base or base_id, id_dist or dist_id, we went through such
naming already, I believe :)

I'd also use base_t and dist_t, like you do below with dist_t already

> +
> +       return strcmp(btf__name_by_offset(r->base_btf, tbase->name_off),
> +                     btf__name_by_offset(r->dist_base_btf, tdist->name_off));
> +}
> +
> +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate
> + * through base BTF looking up distilled type (using binary search) equivalents.
> + */
> +static int btf_relocate_map_distilled_base(struct btf_relocate *r)
> +{
> +       struct btf_type *t;
> +       const char *name;
> +       __u32 id;
> +
> +       /* generate a sort index array of type ids sorted by name for distilled
> +        * base BTF to speed lookups.
> +        */
> +       for (id = 1; id < r->nr_dist_base_types; id++)
> +               r->dist_base_index[id] = id;
> +       qsort_r(r->dist_base_index, r->nr_dist_base_types, sizeof(__u32), cmp_btf_types,
> +               (struct btf *)r->dist_base_btf);

Is qsort_r() supported in musl and in Android'd libc implementation?
I'd rather not have to scramble to fix the build for them after
release.

> +

[...]

> +               r->search_id = id;
> +               dist_id = bsearch(&r->search_id, r->dist_base_index, r->nr_dist_base_types,
> +                                 sizeof(__u32), cmp_base_and_distilled_btf_types);
> +               if (!dist_id)
> +                       continue;
> +               if (!*dist_id || *dist_id > r->nr_dist_base_types) {

>=

> +                       pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n",
> +                               id, *dist_id);
> +                       return -EINVAL;
> +               }
> +               /* validate that kinds are compatible */
> +               dist_t = btf_type_by_id(r->dist_base_btf, *dist_id);
> +               dist_kind = btf_kind(dist_t);
> +               name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off);
> +               compat_kind = dist_kind == kind;
> +               if (!compat_kind) {
> +                       switch (dist_kind) {
> +                       case BTF_KIND_FWD:
> +                               compat_kind = kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;

well, not quite. If we have FWD with kflag, then we should match it to
BTF_KIND_UNION, and otherwise to STRUCT. We shouldn't fix them.

also do we match FWD in *base BTF* with FWD in *distilled base BTF*?
That seems a bit wrong, no?

> +                               break;
> +                       case BTF_KIND_ENUM:
> +                               compat_kind = kind == BTF_KIND_ENUM64;
> +                               break;
> +                       default:
> +                               break;
> +                       }
> +                       if (!compat_kind) {
> +                               pr_warn("kind incompatibility (%d != %d) between distilled base type '%s'[%d] and base type [%d]\n",
> +                                       dist_kind, kind, name, *dist_id, id);
> +                               return -EINVAL;
> +                       }
> +               }

umm, what if we are !compat_kind here? go to next or error out, but
there has to be a check

> +               /* validate that int, float struct, union sizes are compatible;
> +                * distilled base BTF encodes an empty STRUCT/UNION with
> +                * specific size for cases where a type is embedded in a split
> +                * type (so has to preserve size info).  Do not error out
> +                * on mismatch as another size match may occur for an
> +                * identically-named type.
> +                */
> +               switch (btf_kind(dist_t)) {
> +               case BTF_KIND_INT:
> +                       if (*(__u32 *)(t + 1) != *(__u32 *)(dist_t + 1))
> +                               continue;
> +                       if (t->size != dist_t->size)
> +                               continue;
> +                       break;
> +               case BTF_KIND_FLOAT:
> +               case BTF_KIND_STRUCT:
> +               case BTF_KIND_UNION:
> +                       if (t->size != dist_t->size)
> +                               continue;
> +                       break;
> +               default:
> +                       break;
> +               }

I don't know, I feel like all these compatibility checks would be
cleaner to handle as part of single switch based on btf_kind(dist_t).
This split between that big if and switch is error-prone and hard to
follow

> +               /* map id and name */
> +               r->map[*dist_id] = id;
> +               r->str_map[dist_t->name_off] = t->name_off;
> +       }
> +       /* ensure all distilled BTF ids have a mapping... */
> +       for (id = 1; id < r->nr_dist_base_types; id++) {
> +               if (r->map[id])
> +                       continue;
> +               t = btf_type_by_id(r->dist_base_btf, id);
> +               name = btf__name_by_offset(r->dist_base_btf, t->name_off);
> +               pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n",
> +                       name, id);
> +               return -EINVAL;
> +       }
> +       return 0;
> +}
> +

[...]

> +static int btf_rewrite_strs(__u32 *str_off, void *ctx)
> +{
> +       struct btf_relocate *r = ctx;
> +       int off;
> +
> +       if (!*str_off)
> +               return 0;
> +       if (*str_off >= r->str_start) {
> +               *str_off += r->str_diff;
> +       } else {
> +               off = r->str_map[*str_off];
> +               if (!off) {
> +                       pr_warn("string '%s' [offset %d] is not mapped to base BTF",
> +                               btf__str_by_offset(r->btf, off), *str_off);

str_off is __u32, but you are using %d

> +                       return -ENOENT;
> +               }
> +               *str_off = off;
> +       }
> +       return 0;
> +}
> +
> +static int btf_relocate_finalize(struct btf_relocate *r)
> +{
> +       const struct btf_header *dist_base_hdr;
> +       const struct btf_header *base_hdr;
> +       struct btf_type *t;
> +       int i, err;
> +
> +       dist_base_hdr = btf_header(r->dist_base_btf);
> +       base_hdr = btf_header(r->base_btf);
> +       r->str_start = dist_base_hdr->str_len;
> +       r->str_diff = base_hdr->str_len - dist_base_hdr->str_len;

it's subjective, but I find str_diff a bit harder to follow compared
to just storing str_old_start and str_new_start, and then doing
obvious translation

str_off = str_off - str_old_start + str_new_start;

This is obvious and will work for any condition, whether old_start is
smaller or bigger than new_start. Same idea for ID translation.

Not a big deal, but I thought I'd call this out.

> +       for (i = 0; i < r->nr_split_types; i++) {
> +               t = btf_type_by_id(r->btf, i + r->nr_dist_base_types);
> +               err = btf_type_visit_str_offs(t, btf_rewrite_strs, r);
> +               if (err)
> +                       break;

return err? Why do we want to set btf_set_base_btf() in case of an error?

> +       }
> +       btf_set_base_btf(r->btf, r->base_btf);
> +
> +       return err;
> +}
> +
> +/* If successful, output of relocation is updated BTF with base BTF pointing
> + * at base_btf, and type ids, strings adjusted accordingly
> + */
> +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids)
> +{
> +       unsigned int nr_types = btf__type_cnt(btf);
> +       struct btf_relocate r = {};
> +       struct btf_type *t;
> +       int diff_id, err = 0;
> +       __u32 id, i;
> +
> +       r.dist_base_btf = btf__base_btf(btf);
> +       if (!base_btf || r.dist_base_btf == base_btf)
> +               return 0;

Why is this not an error condition? Users shouldn't be calling
relocate on something that shouldn't be relocated.

> +
> +       r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf);
> +       r.nr_base_types = btf__type_cnt(base_btf);
> +       r.nr_split_types = nr_types - r.nr_dist_base_types;
> +       r.btf = btf;
> +       r.base_btf = base_btf;
> +
> +       r.map = calloc(nr_types, sizeof(*r.map));

Is this an ID map? Then maybe call it id_map to be symmetrical to str_map?

> +       r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map));
> +       r.dist_base_index = calloc(r.nr_dist_base_types, sizeof(*r.dist_base_index));
> +       if (!r.map || !r.str_map || !r.dist_base_index) {
> +               err = -ENOMEM;
> +               goto err_out;
> +       }
> +
> +       err = btf_relocate_validate_distilled_base(&r);
> +       if (err)
> +               goto err_out;
> +
> +       diff_id = r.nr_base_types - r.nr_dist_base_types;
> +       /* Split BTF ids will start from after last base BTF id. */
> +       for (id = r.nr_dist_base_types; id < nr_types; id++)
> +               r.map[id] = id + diff_id;
> +
> +       /* Build a map from distilled base ids to actual base BTF ids; it is used
> +        * to update split BTF id references.
> +        */
> +       err = btf_relocate_map_distilled_base(&r);
> +       if (err)
> +               goto err_out;
> +
> +       /* Next, rewrite type ids in split BTF, replacing split ids with updated
> +        * ids based on number of types in base BTF, and base ids with
> +        * relocated ids from base_btf.
> +        */
> +       for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) {
> +               t = btf_type_by_id(btf, id);
> +               err = btf_type_visit_type_ids(t, btf_relocate_rewrite_type_id, &r);
> +               if (err)
> +                       goto err_out;
> +       }
> +       /* Finally reset base BTF to base_btf; as part of this operation, string
> +        * offsets are also updated, and we are done.
> +        */
> +       err = btf_relocate_finalize(&r);
> +err_out:
> +       if (!err && map_ids)
> +               *map_ids = r.map;
> +       else
> +               free(r.map);

this is a bit convoluted. maybe something like

    err = btf_relocate_finalize(&r);
    if (err)
        goto err_out;

    if (map_ids) {
        *map_ids = r.map;
        r.map = NULL;
    }

err_out:
    ... all the free()s unconditionally ...

(even just doing only error case for err_out and duplicating a few
free()'s in success path seems nicer)

> +       free(r.str_map);
> +       free(r.dist_base_index);
> +       return err;
> +}

[...]