On Wed, Jul 31, 2019 at 11:47:53PM -0700, Andrii Nakryiko wrote: > This patch implements the core logic for BPF CO-RE offsets relocations. > Every instruction that needs to be relocated has corresponding > bpf_offset_reloc as part of BTF.ext. Relocations are performed by trying > to match recorded "local" relocation spec against potentially many > compatible "target" types, creating corresponding spec. Details of the > algorithm are noted in corresponding comments in the code. > > Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> > Acked-by: Song Liu <songliubraving@xxxxxx> ... > + if (btf_is_composite(t)) { > + const struct btf_member *m = (void *)(t + 1); > + __u32 offset; > + > + if (access_idx >= BTF_INFO_VLEN(t->info)) > + return -EINVAL; > + > + m = &m[access_idx]; > + > + if (BTF_INFO_KFLAG(t->info)) { > + if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) > + return -EINVAL; > + offset = BTF_MEMBER_BIT_OFFSET(m->offset); > + } else { > + offset = m->offset; > + } very similar logic exists in btf_dump.c probably makes sense to make a common helper at some point. > +static size_t bpf_core_essential_name_len(const char *name) > +{ > + size_t n = strlen(name); > + int i = n - 3; > + > + while (i > 0) { > + if (name[i] == '_' && name[i + 1] == '_' && name[i + 2] == '_') > + return i; > + i--; > + } > + return n; > +} that's a bit of an eye irritant. How about? size_t n = strlen(name); int i, cnt = 0; for (i = n - 1; i >= 0; i--) { if (name[i] == '_') { cnt++; } else { if (cnt == 3) return i + 1; cnt = 0; } } return n; > + case BTF_KIND_ARRAY: { > + const struct btf_array *loc_a, *targ_a; > + > + loc_a = (void *)(local_type + 1); > + targ_a = (void *)(targ_type + 1); > + local_id = loc_a->type; > + targ_id = targ_a->type; can we add a helper like: const struct btf_array *btf_array(cosnt struct btf_type *t) { return (const struct btf_array *)(t + 1); } then above will be: case BTF_KIND_ARRAY: { local_id = btf_array(local_type)->type; targ_id = btf_array(targ_type)->type; and a bunch of code in btf.c and btf_dump.c would be cleaner as well? > + goto recur; > + } > + default: > + pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", > + kind, local_id, targ_id); > + return 0; > + } > +} > + > +/* > + * Given single high-level named field accessor in local type, find > + * corresponding high-level accessor for a target type. Along the way, > + * maintain low-level spec for target as well. Also keep updating target > + * offset. > + * > + * Searching is performed through recursive exhaustive enumeration of all > + * fields of a struct/union. If there are any anonymous (embedded) > + * structs/unions, they are recursively searched as well. If field with > + * desired name is found, check compatibility between local and target types, > + * before returning result. > + * > + * 1 is returned, if field is found. > + * 0 is returned if no compatible field is found. > + * <0 is returned on error. > + */ > +static int bpf_core_match_member(const struct btf *local_btf, > + const struct bpf_core_accessor *local_acc, > + const struct btf *targ_btf, > + __u32 targ_id, > + struct bpf_core_spec *spec, > + __u32 *next_targ_id) > +{ > + const struct btf_type *local_type, *targ_type; > + const struct btf_member *local_member, *m; > + const char *local_name, *targ_name; > + __u32 local_id; > + int i, n, found; > + > + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); > + if (!targ_type) > + return -EINVAL; > + if (!btf_is_composite(targ_type)) > + return 0; > + > + local_id = local_acc->type_id; > + local_type = btf__type_by_id(local_btf, local_id); > + local_member = (void *)(local_type + 1); > + local_member += local_acc->idx; > + local_name = btf__name_by_offset(local_btf, local_member->name_off); > + > + n = BTF_INFO_VLEN(targ_type->info); > + m = (void *)(targ_type + 1); new btf_member() helper? > + for (i = 0; i < n; i++, m++) { > + __u32 offset; > + > + /* bitfield relocations not supported */ > + if (BTF_INFO_KFLAG(targ_type->info)) { > + if (BTF_MEMBER_BITFIELD_SIZE(m->offset)) > + continue; > + offset = BTF_MEMBER_BIT_OFFSET(m->offset); > + } else { > + offset = m->offset; > + } > + if (offset % 8) > + continue; same bit of code again? definitely could use a helper. > + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { > + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, > + &targ_id); > + if (!targ_type) > + return -EINVAL; > + > + if (local_acc->name) { > + matched = bpf_core_match_member(local_spec->btf, > + local_acc, > + targ_btf, targ_id, > + targ_spec, &targ_id); > + if (matched <= 0) > + return matched; > + } else { > + /* for i=0, targ_id is already treated as array element > + * type (because it's the original struct), for others > + * we should find array element type first > + */ > + if (i > 0) { > + const struct btf_array *a; > + > + if (!btf_is_array(targ_type)) > + return 0; > + > + a = (void *)(targ_type + 1); > + if (local_acc->idx >= a->nelems) > + return 0; am I reading it correctly that the local spec requested out-of-bounds index in the target array type? Why this is 'ignore' instead of -EINVAL? > +/* > + * Probe few well-known locations for vmlinux kernel image and try to load BTF > + * data out of it to use for target BTF. > + */ > +static struct btf *bpf_core_find_kernel_btf(void) > +{ > + const char *locations[] = { > + "/lib/modules/%1$s/vmlinux-%1$s", > + "/usr/lib/modules/%1$s/kernel/vmlinux", > + }; > + char path[PATH_MAX + 1]; > + struct utsname buf; > + struct btf *btf; > + int i, err; > + > + err = uname(&buf); > + if (err) { > + pr_warning("failed to uname(): %d\n", err); defensive programming ? I think uname() can fail only if &buf points to non-existing page like null. > + return ERR_PTR(err); > + } > + > + for (i = 0; i < ARRAY_SIZE(locations); i++) { > + snprintf(path, PATH_MAX, locations[i], buf.release); > + pr_debug("attempting to load kernel BTF from '%s'\n", path); I think this debug message would have been more useful after access(). > + > + if (access(path, R_OK)) > + continue; > + > + btf = btf__parse_elf(path, NULL); > + if (IS_ERR(btf)) > + continue; > + > + pr_debug("successfully loaded kernel BTF from '%s'\n", path); > + return btf; > + } > + > + pr_warning("failed to find valid kernel BTF\n"); > + return ERR_PTR(-ESRCH); > +} > + > +/* Output spec definition in the format: > + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, > + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b > + */ > +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) > +{ > + const struct btf_type *t; > + const char *s; > + __u32 type_id; > + int i; > + > + type_id = spec->spec[0].type_id; > + t = btf__type_by_id(spec->btf, type_id); > + s = btf__name_by_offset(spec->btf, t->name_off); > + libbpf_print(level, "[%u] (%s) + ", type_id, s); imo extra []() don't improve readability of the dump. > + > + for (i = 0; i < spec->raw_len; i++) > + libbpf_print(level, "%d%s", spec->raw_spec[i], > + i == spec->raw_len - 1 ? " => " : ":"); > + > + libbpf_print(level, "%u @ &x", spec->offset); > + > + for (i = 0; i < spec->len; i++) { > + if (spec->spec[i].name) > + libbpf_print(level, ".%s", spec->spec[i].name); > + else > + libbpf_print(level, "[%u]", spec->spec[i].idx); > + } > + > +} > + > +static size_t bpf_core_hash_fn(const void *key, void *ctx) > +{ > + return (size_t)key; > +} > + > +static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) > +{ > + return k1 == k2; > +} > + > +static void *u32_to_ptr(__u32 x) > +{ > + return (void *)(uintptr_t)x; > +} u32 to pointer on 64-bit arch?! That surely needs a comment. > + > +/* > + * CO-RE relocate single instruction. > + * > + * The outline and important points of the algorithm: > + * 1. For given local type, find corresponding candidate target types. > + * Candidate type is a type with the same "essential" name, ignoring > + * everything after last triple underscore (___). E.g., `sample`, > + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates > + * for each other. Names with triple underscore are referred to as > + * "flavors" and are useful, among other things, to allow to > + * specify/support incompatible variations of the same kernel struct, which > + * might differ between different kernel versions and/or build > + * configurations. > + * > + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C > + * converter, when deduplicated BTF of a kernel still contains more than > + * one different types with the same name. In that case, ___2, ___3, etc > + * are appended starting from second name conflict. But start flavors are > + * also useful to be defined "locally", in BPF program, to extract same > + * data from incompatible changes between different kernel > + * versions/configurations. For instance, to handle field renames between > + * kernel versions, one can use two flavors of the struct name with the > + * same common name and use conditional relocations to extract that field, > + * depending on target kernel version. there are actual kernel types that have ___ in the name. Ex: struct lmc___media We probably need to revisit this 'flavor' convention. > + for (i = 0, j = 0; i < cand_ids->len; i++) { > + cand_id = cand_ids->data[i]; > + cand_type = btf__type_by_id(targ_btf, cand_id); > + cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); > + > + err = bpf_core_spec_match(&local_spec, targ_btf, > + cand_id, &cand_spec); > + if (err < 0) { > + pr_warning("prog '%s': relo #%d: failed to match spec ", > + prog_name, relo_idx); > + bpf_core_dump_spec(LIBBPF_WARN, &local_spec); > + libbpf_print(LIBBPF_WARN, > + " to candidate #%d [%d] (%s): %d\n", > + i, cand_id, cand_name, err); > + return err; > + } > + if (err == 0) { > + pr_debug("prog '%s': relo #%d: candidate #%d [%d] (%s) doesn't match spec ", > + prog_name, relo_idx, i, cand_id, cand_name); > + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); > + libbpf_print(LIBBPF_DEBUG, "\n"); > + continue; > + } > + > + pr_debug("prog '%s': relo #%d: candidate #%d matched as spec ", > + prog_name, relo_idx, i); did you mention that you're going to make a helper for this debug dumps?