On 24/08/2023 21:13, Andrii Nakryiko wrote: > Implement a simple and straightforward BTF sanity check when parsing BTF > data. Right now it's very basic and just validates that all the string > offsets and type IDs are within valid range. But even with such simple > checks it fixes a bunch of crashes found by OSS fuzzer ([0]-[5]) and > will allow fuzzer to make further progress. > > Some other invariants will be checked in follow up patches (like > ensuring there is no infinite type loops), but this seems like a good > start already. > > v1->v2: > - fix array index_type vs type copy/paste error (Eduard); > - add type ID check in FUNC_PROTO validation (Eduard); > - move sanity check to btf parsing time (Martin). > > [0] https://github.com/libbpf/libbpf/issues/482 > [1] https://github.com/libbpf/libbpf/issues/483 > [2] https://github.com/libbpf/libbpf/issues/485 > [3] https://github.com/libbpf/libbpf/issues/613 > [4] https://github.com/libbpf/libbpf/issues/618 > [5] https://github.com/libbpf/libbpf/issues/619 > > Closes: https://github.com/libbpf/libbpf/issues/617 > Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx> few small suggestions that could be in followups if needed, so Reviewed-by: Alan Maguire <alan.maguire@xxxxxxxxxx> > --- > tools/lib/bpf/btf.c | 148 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 148 insertions(+) > > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c > index 8484b563b53d..28905539f045 100644 > --- a/tools/lib/bpf/btf.c > +++ b/tools/lib/bpf/btf.c > @@ -448,6 +448,153 @@ static int btf_parse_type_sec(struct btf *btf) > return 0; > } > > +static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id) > +{ > + const char *s; > + > + s = btf__str_by_offset(btf, str_off); > + if (!s) { > + pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id) > +{ > + const struct btf_type *t; > + might be worth having a self-reference test here to ensure ctx_id != id. > + t = btf__type_by_id(btf, id); > + if (!t) { > + pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id) > +{ > + __u32 kind = btf_kind(t); > + int err, i, n; > + > + err = btf_validate_str(btf, t->name_off, "type name", id); > + if (err) > + return err; > + > + switch (kind) { > + case BTF_KIND_UNKN: > + case BTF_KIND_INT: > + case BTF_KIND_FWD: > + case BTF_KIND_FLOAT: > + break; > + case BTF_KIND_PTR: > + case BTF_KIND_TYPEDEF: > + case BTF_KIND_VOLATILE: > + case BTF_KIND_CONST: > + case BTF_KIND_RESTRICT: > + case BTF_KIND_FUNC: would it be worth doing an additional check here that t->type for BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO? I think that's the only case where BTF mandates the kind of the target is a specific kind, so might be worth checking. I initially thought passing an expected kind to btf_validate_id() might make sense, but given that there's only one case we have a specific expectation that seemed unnecessary. > + case BTF_KIND_VAR: > + case BTF_KIND_DECL_TAG: > + case BTF_KIND_TYPE_TAG: > + err = btf_validate_id(btf, t->type, id); > + if (err) > + return err; > + break; > + case BTF_KIND_ARRAY: { > + const struct btf_array *a = btf_array(t); > + > + err = btf_validate_id(btf, a->type, id); > + err = err ?: btf_validate_id(btf, a->index_type, id); > + if (err) > + return err; > + break; > + } > + case BTF_KIND_STRUCT: > + case BTF_KIND_UNION: { > + const struct btf_member *m = btf_members(t); > + > + n = btf_vlen(t); > + for (i = 0; i < n; i++, m++) { > + err = btf_validate_str(btf, m->name_off, "field name", id); > + err = err ?: btf_validate_id(btf, m->type, id); > + if (err) > + return err; > + } > + break; > + } > + case BTF_KIND_ENUM: { > + const struct btf_enum *m = btf_enum(t); > + > + n = btf_vlen(t); > + for (i = 0; i < n; i++, m++) { > + err = btf_validate_str(btf, m->name_off, "enum name", id); > + if (err) > + return err; > + } > + break; > + } > + case BTF_KIND_ENUM64: { > + const struct btf_enum64 *m = btf_enum64(t); > + > + n = btf_vlen(t); > + for (i = 0; i < n; i++, m++) { > + err = btf_validate_str(btf, m->name_off, "enum name", id); > + if (err) > + return err; > + } > + break; > + } > + case BTF_KIND_FUNC_PROTO: { > + const struct btf_param *m = btf_params(t); > + > + n = btf_vlen(t); > + for (i = 0; i < n; i++, m++) { > + err = btf_validate_str(btf, m->name_off, "param name", id); > + err = err ?: btf_validate_id(btf, m->type, id); > + if (err) > + return err; > + } > + break; > + } > + case BTF_KIND_DATASEC: { > + const struct btf_var_secinfo *m = btf_var_secinfos(t); > + > + n = btf_vlen(t); > + for (i = 0; i < n; i++, m++) { > + err = btf_validate_id(btf, m->type, id); > + if (err) > + return err; > + } > + break; > + } > + default: > + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); > + return -EINVAL; > + } > + return 0; > +} > + > +/* Validate basic sanity of BTF. It's intentionally less thorough than > + * kernel's validation and validates only properties of BTF that libbpf relies > + * on to be correct (e.g., valid type IDs, valid string offsets, etc) > + */ > +static int btf_sanity_check(const struct btf *btf) > +{ > + const struct btf_type *t; > + __u32 i, n = btf__type_cnt(btf); > + int err; > + > + for (i = 1; i < n; i++) { > + t = btf_type_by_id(btf, i); > + err = btf_validate_type(btf, t, i); > + if (err) > + return err; > + } > + return 0; > +} > + > __u32 btf__type_cnt(const struct btf *btf) > { > return btf->start_id + btf->nr_types; > @@ -902,6 +1049,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) > > err = btf_parse_str_sec(btf); > err = err ?: btf_parse_type_sec(btf); > + err = err ?: btf_sanity_check(btf); > if (err) > goto done; > While we usually load vmlinux BTF from /sys/kernel/btf, we fall back to a set of on-disk locations. Specifically in btf__load_vmlinux_btf(), for the case where the array index > 0, it might be worth sanity-checking BTF there too. Thanks! Alan