On 02/11/2022 11:09, Eduard Zingerman wrote: > Resolve forward declarations that don't take part in type graphs > comparisons if declaration name is unambiguous. Example: > > CU #1: > > struct foo; // standalone forward declaration > struct foo *some_global; > > CU #2: > > struct foo { int x; }; > struct foo *another_global; > > The `struct foo` from CU #1 is not a part of any definition that is > compared against another definition while `btf_dedup_struct_types` > processes structural types. The the BTF after `btf_dedup_struct_types` > the BTF looks as follows: > > [1] STRUCT 'foo' size=4 vlen=1 ... > [2] INT 'int' size=4 ... > [3] PTR '(anon)' type_id=1 > [4] FWD 'foo' fwd_kind=struct > [5] PTR '(anon)' type_id=4 > > This commit adds a new pass `btf_dedup_resolve_fwds`, that maps such > forward declarations to structs or unions with identical name in case > if the name is not ambiguous. > > The pass is positioned before `btf_dedup_ref_types` so that types > [3] and [5] could be merged as a same type after [1] and [4] are merged. > The final result for the example above looks as follows: > > [1] STRUCT 'foo' size=4 vlen=1 > 'x' type_id=2 bits_offset=0 > [2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED > [3] PTR '(anon)' type_id=1 > > For defconfig kernel with BTF enabled this removes 63 forward > declarations. Examples of removed declarations: `pt_regs`, `in6_addr`. > The running time of `btf__dedup` function is increased by about 3%. > > Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx> A few small things below, but looks great! Reviewed-by: Alan Maguire <alan.maguire@xxxxxxxxxx> > --- > tools/lib/bpf/btf.c | 140 ++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 136 insertions(+), 4 deletions(-) > > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c > index 04db202aac3d..d2f994d30af7 100644 > --- a/tools/lib/bpf/btf.c > +++ b/tools/lib/bpf/btf.c > @@ -2881,6 +2881,7 @@ static int btf_dedup_strings(struct btf_dedup *d); > static int btf_dedup_prim_types(struct btf_dedup *d); > static int btf_dedup_struct_types(struct btf_dedup *d); > static int btf_dedup_ref_types(struct btf_dedup *d); > +static int btf_dedup_resolve_fwds(struct btf_dedup *d); > static int btf_dedup_compact_types(struct btf_dedup *d); > static int btf_dedup_remap_types(struct btf_dedup *d); > > @@ -2988,15 +2989,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d); > * Algorithm summary > * ================= > * > - * Algorithm completes its work in 6 separate passes: > + * Algorithm completes its work in 7 separate passes: > * > * 1. Strings deduplication. > * 2. Primitive types deduplication (int, enum, fwd). > * 3. Struct/union types deduplication. > - * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func > + * 4. Resolve unambiguous forward declarations. > + * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func > * protos, and const/volatile/restrict modifiers). > - * 5. Types compaction. > - * 6. Types remapping. > + * 6. Types compaction. > + * 7. Types remapping. > * > * Algorithm determines canonical type descriptor, which is a single > * representative type for each truly unique type. This canonical type is the > @@ -3060,6 +3062,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) > pr_debug("btf_dedup_struct_types failed:%d\n", err); > goto done; > } > + err = btf_dedup_resolve_fwds(d); > + if (err < 0) { > + pr_debug("btf_dedup_resolve_fwds failed:%d\n", err); > + goto done; > + } > err = btf_dedup_ref_types(d); > if (err < 0) { > pr_debug("btf_dedup_ref_types failed:%d\n", err); > @@ -4526,6 +4533,131 @@ static int btf_dedup_ref_types(struct btf_dedup *d) > return 0; > } > > +/* > + * Collect a map from type names to type ids for all canonical structs > + * and unions. If the same name is shared by several canonical types > + * use a special value 0 to indicate this fact. > + */ > +static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map) > +{ > + __u32 nr_types = btf__type_cnt(d->btf); > + struct btf_type *t; > + __u32 type_id; > + __u16 kind; > + int err; > + > + /* > + * Iterate over base and split module ids in order to get all > + * available structs in the map. > + */ > + for (type_id = 1; type_id < nr_types; ++type_id) { > + t = btf_type_by_id(d->btf, type_id); > + kind = btf_kind(t); > + > + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) > + continue; > + > + /* Skip non-canonical types */ > + if (type_id != d->map[type_id]) > + continue; > + > + err = hashmap__add(names_map, t->name_off, type_id); > + if (err == -EEXIST) > + err = hashmap__set(names_map, t->name_off, 0, NULL, NULL); > +> + if (err) > + return err; > + } > + > + return 0; > +} > + > +static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id) > +{ > + struct btf_type *t = btf_type_by_id(d->btf, type_id); > + enum btf_fwd_kind fwd_kind = btf_kflag(t); > + __u16 cand_kind, kind = btf_kind(t); > + struct btf_type *cand_t; > + uintptr_t cand_id = 0; > + > + if (kind != BTF_KIND_FWD) > + return 0; > + > + /* Skip if this FWD already has a mapping */ > + if (type_id != d->map[type_id]) > + return 0; > + > + hashmap__find(names_map, t->name_off, &cand_id); would it be safer to do if (!hashmap__find(names_map, t->name_off, &cand_id)) return 0; > + if (!cand_id) > + return 0; > + ...and might be no harm to reiterate the special meaning of 0 here (multiple name matches -> ambiguous) since it's a valid type id (void) in other cases. While strictly you probably don't need separate conditions for not found and found ambiguous name, it might read a bit easier and more consistently with other users of hashmap__find(). > + cand_t = btf_type_by_id(d->btf, cand_id); > + cand_kind = btf_kind(cand_t); > + if (!(cand_kind == BTF_KIND_STRUCT && fwd_kind == BTF_FWD_STRUCT) && > + !(cand_kind == BTF_KIND_UNION && fwd_kind == BTF_FWD_UNION)) > + return 0; > + I'd find if ((cand_id == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) || (cand_id == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION)) ...a bit easier to parse, but again not a big deal. > + d->map[type_id] = cand_id; > + > + return 0; > +} > + > +/* > + * Resolve unambiguous forward declarations. > + * > + * The lion's share of all FWD declarations is resolved during > + * `btf_dedup_struct_types` phase when different type graphs are > + * compared against each other. However, if in some compilation unit a > + * FWD declaration is not a part of a type graph compared against > + * another type graph that declaration's canonical type would not be > + * changed. Example: > + * > + * CU #1: > + * > + * struct foo; > + * struct foo *some_global; > + * > + * CU #2: > + * > + * struct foo { int u; }; > + * struct foo *another_global; > + * > + * After `btf_dedup_struct_types` the BTF looks as follows: > + * > + * [1] STRUCT 'foo' size=4 vlen=1 ... > + * [2] INT 'int' size=4 ... > + * [3] PTR '(anon)' type_id=1 > + * [4] FWD 'foo' fwd_kind=struct > + * [5] PTR '(anon)' type_id=4 > + * > + * This pass assumes that such FWD declarations should be mapped to > + * structs or unions with identical name in case if the name is not > + * ambiguous. > + */ > +static int btf_dedup_resolve_fwds(struct btf_dedup *d) > +{ > + int i, err; > + struct hashmap *names_map = > + hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); > + > + if (!names_map) > + return -ENOMEM; > + > + err = btf_dedup_fill_unique_names_map(d, names_map); > + if (err < 0) > + goto exit; > + > + for (i = 0; i < d->btf->nr_types; i++) { > + err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i); > + if (err < 0) > + goto exit; could just break; here I suppose > + } > + > +exit: > + hashmap__free(names_map); > + return err; > +} > + > /* > * Compact types. > * >