> On Jul 24, 2019, at 12:27 PM, Andrii Nakryiko <andriin@xxxxxx> wrote: > > This patch implements the core logic for BPF CO-RE offsets relocations. > All the details are described in code comments. > > Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> > --- > tools/lib/bpf/libbpf.c | 866 ++++++++++++++++++++++++++++++++++++++++- > tools/lib/bpf/libbpf.h | 1 + > 2 files changed, 861 insertions(+), 6 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 8741c39adb1c..86d87bf10d46 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -38,6 +38,7 @@ > #include <sys/stat.h> > #include <sys/types.h> > #include <sys/vfs.h> > +#include <sys/utsname.h> > #include <tools/libc_compat.h> > #include <libelf.h> > #include <gelf.h> > @@ -47,6 +48,7 @@ > #include "btf.h" > #include "str_error.h" > #include "libbpf_internal.h" > +#include "hashmap.h" > > #ifndef EM_BPF > #define EM_BPF 247 > @@ -1013,16 +1015,22 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) > } > > static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, > - __u32 id) > + __u32 id, > + __u32 *res_id) > { > const struct btf_type *t = btf__type_by_id(btf, id); > > + if (res_id) > + *res_id = id; > + > while (true) { > switch (BTF_INFO_KIND(t->info)) { > case BTF_KIND_VOLATILE: > case BTF_KIND_CONST: > case BTF_KIND_RESTRICT: > case BTF_KIND_TYPEDEF: > + if (res_id) > + *res_id = t->type; > t = btf__type_by_id(btf, t->type); > break; > default: > @@ -1041,7 +1049,7 @@ static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, > static bool get_map_field_int(const char *map_name, const struct btf *btf, > const struct btf_type *def, > const struct btf_member *m, __u32 *res) { > - const struct btf_type *t = skip_mods_and_typedefs(btf, m->type); > + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); > const char *name = btf__name_by_offset(btf, m->name_off); > const struct btf_array *arr_info; > const struct btf_type *arr_t; > @@ -1107,7 +1115,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, > return -EOPNOTSUPP; > } > > - def = skip_mods_and_typedefs(obj->btf, var->type); > + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); > if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) { > pr_warning("map '%s': unexpected def kind %u.\n", > map_name, BTF_INFO_KIND(var->info)); > @@ -2289,6 +2297,845 @@ bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, > return 0; > } > > +#define BPF_CORE_SPEC_MAX_LEN 64 > + > +/* represents BPF CO-RE field or array element accessor */ > +struct bpf_core_accessor { > + __u32 type_id; /* struct/union type or array element type */ > + __u32 idx; /* field index or array index */ > + const char *name; /* field name or NULL for array accessor */ > +}; > + > +struct bpf_core_spec { > + const struct btf *btf; > + /* high-level spec: named fields and array indicies only */ > + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; > + /* high-level spec length */ > + int len; > + /* raw, low-level spec: 1-to-1 with accessor spec string */ > + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; > + /* raw spec length */ > + int raw_len; > + /* field byte offset represented by spec */ > + __u32 offset; > +}; > + > +static bool str_is_empty(const char *s) > +{ > + return !s || !s[0]; > +} > + > +static int btf_kind(const struct btf_type *t) > +{ > + return BTF_INFO_KIND(t->info); > +} > + > +static bool btf_is_composite(const struct btf_type *t) > +{ > + int kind = btf_kind(t); > + > + return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; > +} > + > +static bool btf_is_array(const struct btf_type *t) > +{ > + return btf_kind(t) == BTF_KIND_ARRAY; > +} > + > +/* > + * Turn bpf_offset_reloc into a low- and high-level spec representation, > + * validating correctness along the way, as well as calculating resulting > + * field offset (in bytes), specified by accessor string. Low-level spec > + * captures every single level of nestedness, including traversing anonymous > + * struct/union members. High-level one only captures semantically meaningful > + * "turning points": named fields and array indicies. > + * E.g., for this case: > + * > + * struct sample { > + * int __unimportant; > + * struct { > + * int __1; > + * int __2; > + * int a[7]; > + * }; > + * }; > + * > + * struct sample *s = ...; > + * > + * int x = &s->a[3]; // access string = '0:1:2:3' > + * > + * Low-level spec has 1:1 mapping with each element of access string (it's > + * just a parsed access string representation): [0, 1, 2, 3]. > + * > + * High-level spec will capture only 3 points: > + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); > + * - field 'a' access (corresponds to '2' in low-level spec); > + * - array element #3 access (corresponds to '3' in low-level spec). > + * > + */ > +static int bpf_core_spec_parse(const struct btf *btf, > + __u32 type_id, > + const char *spec_str, > + struct bpf_core_spec *spec) > +{ > + int access_idx, parsed_len, i; > + const struct btf_type *t; > + __u32 id = type_id; > + const char *name; > + __s64 sz; > + > + if (str_is_empty(spec_str) || *spec_str == ':') > + return -EINVAL; > + > + memset(spec, 0, sizeof(*spec)); > + spec->btf = btf; > + > + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ > + while (*spec_str) { > + if (*spec_str == ':') > + ++spec_str; > + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) > + return -EINVAL; > + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) > + return -E2BIG; > + spec_str += parsed_len; > + spec->raw_spec[spec->raw_len++] = access_idx; > + } > + > + if (spec->raw_len == 0) > + return -EINVAL; > + > + for (i = 0; i < spec->raw_len; i++) { > + t = skip_mods_and_typedefs(btf, id, &id); > + if (!t) > + return -EINVAL; > + > + access_idx = spec->raw_spec[i]; > + > + if (i == 0) { > + /* first spec value is always reloc type array index */ > + spec->spec[spec->len].type_id = id; > + spec->spec[spec->len].idx = access_idx; > + spec->len++; > + > + sz = btf__resolve_size(btf, id); > + if (sz < 0) > + return sz; > + spec->offset += access_idx * sz; > + continue; > + } > + > + if (btf_is_composite(t)) { > + const struct btf_member *m = (void *)(t + 1); > + __u32 offset; > + > + if (access_idx >= BTF_INFO_VLEN(t->info)) > + return -EINVAL; > + > + m = &m[access_idx]; > + > + if (BTF_INFO_KFLAG(t->info)) { > + if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) > + return -EINVAL; > + offset = BTF_MEMBER_BIT_OFFSET(m->offset); > + } else { > + offset = m->offset; > + } > + if (m->offset % 8) > + return -EINVAL; > + spec->offset += offset / 8; > + > + if (m->name_off) { > + name = btf__name_by_offset(btf, m->name_off); > + if (str_is_empty(name)) > + return -EINVAL; > + > + spec->spec[spec->len].type_id = id; > + spec->spec[spec->len].idx = access_idx; > + spec->spec[spec->len].name = name; > + spec->len++; > + } > + > + id = m->type; > + } else if (btf_is_array(t)) { > + const struct btf_array *a = (void *)(t + 1); > + > + t = skip_mods_and_typedefs(btf, a->type, &id); > + if (!t || access_idx >= a->nelems) > + return -EINVAL; > + > + spec->spec[spec->len].type_id = id; > + spec->spec[spec->len].idx = access_idx; > + spec->len++; > + > + sz = btf__resolve_size(btf, id); > + if (sz < 0) > + return sz; > + spec->offset += access_idx * sz; > + } else { > + pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", > + type_id, spec_str, i, id, btf_kind(t)); > + return -EINVAL; > + } > + } > + > + if (spec->len == 0) > + return -EINVAL; > + > + return 0; > +} > + > +/* Given 'some_struct_name___with_flavor' return the length of a name prefix > + * before last triple underscore. Struct name part after last triple > + * underscore is ignored by BPF CO-RE relocation during relocation matching. > + */ > +static size_t bpf_core_essential_name_len(const char *name) > +{ > + size_t n = strlen(name); > + int i = n - 3; > + > + while (i > 0) { > + if (name[i] == '_' && name[i + 1] == '_' && name[i + 2] == '_') > + return i; > + i--; > + } > + return n; > +} > + > +/* dynamically sized list of type IDs */ > +struct ids_vec { > + __u32 *data; > + int len; > +}; > + > +static void bpf_core_free_cands(struct ids_vec *cand_ids) > +{ > + free(cand_ids->data); > + free(cand_ids); > +} > + > +static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, > + __u32 local_type_id, > + const struct btf *targ_btf) > +{ > + size_t local_essent_len, targ_essent_len; > + const char *local_name, *targ_name; > + const struct btf_type *t; > + struct ids_vec *cand_ids; > + __u32 *new_ids; > + int i, err, n; > + > + t = btf__type_by_id(local_btf, local_type_id); > + if (!t) > + return ERR_PTR(-EINVAL); > + > + local_name = btf__name_by_offset(local_btf, t->name_off); > + if (str_is_empty(local_name)) > + return ERR_PTR(-EINVAL); > + local_essent_len = bpf_core_essential_name_len(local_name); > + > + cand_ids = calloc(1, sizeof(*cand_ids)); > + if (!cand_ids) > + return ERR_PTR(-ENOMEM); > + > + n = btf__get_nr_types(targ_btf); > + for (i = 1; i <= n; i++) { > + t = btf__type_by_id(targ_btf, i); > + targ_name = btf__name_by_offset(targ_btf, t->name_off); > + if (str_is_empty(targ_name)) > + continue; > + > + targ_essent_len = bpf_core_essential_name_len(targ_name); > + if (targ_essent_len != local_essent_len) > + continue; > + > + if (strncmp(local_name, targ_name, local_essent_len) == 0) { > + pr_debug("[%d] (%s): found candidate [%d] (%s)\n", > + local_type_id, local_name, i, targ_name); > + new_ids = realloc(cand_ids->data, cand_ids->len + 1); > + if (!new_ids) { > + err = -ENOMEM; > + goto err_out; > + } > + cand_ids->data = new_ids; > + cand_ids->data[cand_ids->len++] = i; > + } > + } > + return cand_ids; > +err_out: > + bpf_core_free_cands(cand_ids); > + return ERR_PTR(err); > +} > + > +/* Check two types for compatibility, skipping const/volatile/restrict and > + * typedefs, to ensure we are relocating offset to the compatible entities: > + * - any two STRUCTs/UNIONs are compatible and can be mixed; > + * - any two FWDs are compatible; > + * - any two PTRs are always compatible; > + * - for ENUMs, check sizes, names are ignored; > + * - for INT, size and bitness should match, signedness is ignored; > + * - for ARRAY, dimensionality is ignored, element types are checked for > + * compatibility recursively; > + * - everything else shouldn't be ever a target of relocation. > + * These rules are not set in stone and probably will be adjusted as we get > + * more experience with using BPF CO-RE relocations. > + */ > +static int bpf_core_fields_are_compat(const struct btf *local_btf, > + __u32 local_id, > + const struct btf *targ_btf, > + __u32 targ_id) > +{ > + const struct btf_type *local_type, *targ_type; > + __u16 kind; > + > +recur: > + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); > + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); > + if (!local_type || !targ_type) > + return -EINVAL; > + > + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) > + return 1; > + if (BTF_INFO_KIND(local_type->info) != BTF_INFO_KIND(targ_type->info)) > + return 0; > + > + kind = BTF_INFO_KIND(local_type->info); > + switch (kind) { > + case BTF_KIND_FWD: > + case BTF_KIND_PTR: > + return 1; > + case BTF_KIND_ENUM: > + return local_type->size == targ_type->size; > + case BTF_KIND_INT: { > + __u32 loc_int = *(__u32 *)(local_type + 1); > + __u32 targ_int = *(__u32 *)(targ_type + 1); > + > + return BTF_INT_OFFSET(loc_int) == 0 && > + BTF_INT_OFFSET(targ_int) == 0 && > + local_type->size == targ_type->size && > + BTF_INT_BITS(loc_int) == BTF_INT_BITS(targ_int); > + } > + case BTF_KIND_ARRAY: { > + const struct btf_array *loc_a, *targ_a; > + > + loc_a = (void *)(local_type + 1); > + targ_a = (void *)(targ_type + 1); > + local_id = loc_a->type; > + targ_id = targ_a->type; > + goto recur; > + } > + default: > + pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", > + kind, local_id, targ_id); > + return 0; > + } > +} > + > +/* > + * Given single high-level accessor (either named field or array index) in > + * local type, find corresponding high-level accessor for a target type. Along > + * the way, maintain low-level spec for target as well. Also keep updating > + * target offset. > + */ > +static int bpf_core_match_member(const struct btf *local_btf, > + const struct bpf_core_accessor *local_acc, > + const struct btf *targ_btf, > + __u32 targ_id, > + struct bpf_core_spec *spec, > + __u32 *next_targ_id) > +{ > + const struct btf_type *local_type, *targ_type; > + const struct btf_member *local_member, *m; > + const char *local_name, *targ_name; > + __u32 local_id; > + int i, n, found; > + > + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); > + if (!targ_type) > + return -EINVAL; > + if (!btf_is_composite(targ_type)) > + return 0; > + > + local_id = local_acc->type_id; > + local_type = btf__type_by_id(local_btf, local_id); > + local_member = (void *)(local_type + 1); > + local_member += local_acc->idx; > + local_name = btf__name_by_offset(local_btf, local_member->name_off); > + > + n = BTF_INFO_VLEN(targ_type->info); > + m = (void *)(targ_type + 1); > + for (i = 0; i < n; i++, m++) { > + __u32 offset; > + > + /* bitfield relocations not supported */ > + if (BTF_INFO_KFLAG(targ_type->info)) { > + if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) > + continue; > + offset = BTF_MEMBER_BIT_OFFSET(m->offset); > + } else { > + offset = m->offset; > + } > + if (offset % 8) > + continue; > + > + /* too deep struct/union/array nesting */ > + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) > + return -E2BIG; > + > + /* speculate this member will be the good one */ > + spec->offset += offset / 8; > + spec->raw_spec[spec->raw_len++] = i; > + > + targ_name = btf__name_by_offset(targ_btf, m->name_off); > + if (str_is_empty(targ_name)) { > + /* embedded struct/union, we need to go deeper */ > + found = bpf_core_match_member(local_btf, local_acc, > + targ_btf, m->type, > + spec, next_targ_id); > + if (found) /* either found or error */ > + return found; > + } else if (strcmp(local_name, targ_name) == 0) { > + /* matching named field */ > + struct bpf_core_accessor *targ_acc; > + > + targ_acc = &spec->spec[spec->len++]; > + targ_acc->type_id = targ_id; > + targ_acc->idx = i; > + targ_acc->name = targ_name; > + > + *next_targ_id = m->type; > + found = bpf_core_fields_are_compat(local_btf, > + local_member->type, > + targ_btf, m->type); > + if (!found) > + spec->len--; /* pop accessor */ > + return found; > + } > + /* member turned out to be not we looked for */ /* member turned out not to be what we looked for */ Or something similar. The rest of this patch looks good to me. Thanks, Song