Ensure that there can be no ownership cycles among different types by way of having owning objects that can hold some other type as their element. For instance, a map value can only hold allocated objects, but these are allowed to have another bpf_list_head. To prevent unbounded recursion while freeing resources, elements of bpf_list_head in local kptrs can never have a bpf_list_head which are part of list in a map value. Later patches will verify this by having dedicated BTF selftests. Also, to make runtime destruction easier, once btf_struct_metas is fully populated, we can stash the metadata of the value type directly in the metadata of the list_head fields, as that allows easier access to the value type's layout to destruct it at runtime from the btf_field entry of the list head itself. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> --- include/linux/bpf.h | 1 + include/linux/btf.h | 1 + kernel/bpf/btf.c | 71 ++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 4 +++ 4 files changed, 77 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eb6ea53fa5a2..323985a39ece 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -191,6 +191,7 @@ struct btf_field_list_head { struct btf *btf; u32 value_btf_id; u32 node_offset; + struct btf_record *value_rec; }; struct btf_field { diff --git a/include/linux/btf.h b/include/linux/btf.h index a01a8da20021..42d8f3730a8d 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -178,6 +178,7 @@ int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a04e10477567..91aa9c96621f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3723,6 +3723,67 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type return ERR_PTR(ret); } +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) +{ + int i; + + /* There are two owning types, kptr_ref and bpf_list_head. The former + * only supports storing kernel types, which can never store references + * to program allocated local types, atleast not yet. Hence we only need + * to ensure that bpf_list_head ownership does not form cycles. + */ + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD)) + return 0; + for (i = 0; i < rec->cnt; i++) { + struct btf_struct_meta *meta; + u32 btf_id; + + if (!(rec->fields[i].type & BPF_LIST_HEAD)) + continue; + btf_id = rec->fields[i].list_head.value_btf_id; + meta = btf_find_struct_meta(btf, btf_id); + if (!meta) + return -EFAULT; + rec->fields[i].list_head.value_rec = meta->record; + + if (!(rec->field_mask & BPF_LIST_NODE)) + continue; + + /* We need to ensure ownership acyclicity among all types. The + * proper way to do it would be to topologically sort all BTF + * IDs based on the ownership edges, since there can be multiple + * bpf_list_head in a type. Instead, we use the following + * reasoning: + * + * - A type can only be owned by another type in user BTF if it + * has a bpf_list_node. + * - A type can only _own_ another type in user BTF if it has a + * bpf_list_head. + * + * We ensure that if a type has both bpf_list_head and + * bpf_list_node, its element types cannot be owning types. + * + * To ensure acyclicity: + * + * When A only has bpf_list_head, ownership chain can be: + * A -> B -> C + * Where: + * - B has both bpf_list_head and bpf_list_node. + * - C only has bpf_list_node. + * + * When A has both bpf_list_head and bpf_list_node, some other + * type already owns it in the BTF domain, hence it can not own + * another owning type through any of the bpf_list_head edges. + * A -> B + * Where: + * - B only has bpf_list_node. + */ + if (meta->record->field_mask & BPF_LIST_HEAD) + return -ELOOP; + } + return 0; +} + static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv) { const u32 a = *(const u32 *)_a; @@ -5413,6 +5474,16 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, } btf->struct_meta_tab = struct_meta_tab; + if (struct_meta_tab) { + int i; + + for (i = 0; i < struct_meta_tab->cnt; i++) { + err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record); + if (err < 0) + goto errout_meta; + } + } + if (log->level && bpf_verifier_log_full(log)) { err = -ENOSPC; goto errout_meta; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 56ae97d490f4..6140cbc3ed8a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1054,6 +1054,10 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, } } + ret = btf_check_and_fixup_fields(btf, map->record); + if (ret < 0) + goto free_map_tab; + if (map->ops->map_check_btf) { ret = map->ops->map_check_btf(map, btf, key_type, value_type); if (ret < 0) -- 2.38.1