Re: [PATCH bpf-next 3/4] libbpf: Resolve unambigous forward declarations

Alan Maguire <alan.maguire@xxxxxxxxxx> · Wed, 2 Nov 2022 16:36:14 +0000

On 02/11/2022 11:09, Eduard Zingerman wrote:
> Resolve forward declarations that don't take part in type graphs
> comparisons if declaration name is unambiguous. Example:
> 
> CU #1:
> 
> struct foo;              // standalone forward declaration
> struct foo *some_global;
> 
> CU #2:
> 
> struct foo { int x; };
> struct foo *another_global;
> 
> The `struct foo` from CU #1 is not a part of any definition that is
> compared against another definition while `btf_dedup_struct_types`
> processes structural types. The the BTF after `btf_dedup_struct_types`
> the BTF looks as follows:
> 
> [1] STRUCT 'foo' size=4 vlen=1 ...
> [2] INT 'int' size=4 ...
> [3] PTR '(anon)' type_id=1
> [4] FWD 'foo' fwd_kind=struct
> [5] PTR '(anon)' type_id=4
> 
> This commit adds a new pass `btf_dedup_resolve_fwds`, that maps such
> forward declarations to structs or unions with identical name in case
> if the name is not ambiguous.
> 
> The pass is positioned before `btf_dedup_ref_types` so that types
> [3] and [5] could be merged as a same type after [1] and [4] are merged.
> The final result for the example above looks as follows:
> 
> [1] STRUCT 'foo' size=4 vlen=1
> 	'x' type_id=2 bits_offset=0
> [2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
> [3] PTR '(anon)' type_id=1
> 
> For defconfig kernel with BTF enabled this removes 63 forward
> declarations. Examples of removed declarations: `pt_regs`, `in6_addr`.
> The running time of `btf__dedup` function is increased by about 3%.
> 
> Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx>

A few small things below, but looks great!

Reviewed-by: Alan Maguire <alan.maguire@xxxxxxxxxx>

> ---
>  tools/lib/bpf/btf.c | 140 ++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 136 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 04db202aac3d..d2f994d30af7 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -2881,6 +2881,7 @@ static int btf_dedup_strings(struct btf_dedup *d);
>  static int btf_dedup_prim_types(struct btf_dedup *d);
>  static int btf_dedup_struct_types(struct btf_dedup *d);
>  static int btf_dedup_ref_types(struct btf_dedup *d);
> +static int btf_dedup_resolve_fwds(struct btf_dedup *d);
>  static int btf_dedup_compact_types(struct btf_dedup *d);
>  static int btf_dedup_remap_types(struct btf_dedup *d);
>  
> @@ -2988,15 +2989,16 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
>   * Algorithm summary
>   * =================
>   *
> - * Algorithm completes its work in 6 separate passes:
> + * Algorithm completes its work in 7 separate passes:
>   *
>   * 1. Strings deduplication.
>   * 2. Primitive types deduplication (int, enum, fwd).
>   * 3. Struct/union types deduplication.
> - * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
> + * 4. Resolve unambiguous forward declarations.
> + * 5. Reference types deduplication (pointers, typedefs, arrays, funcs, func
>   *    protos, and const/volatile/restrict modifiers).
> - * 5. Types compaction.
> - * 6. Types remapping.
> + * 6. Types compaction.
> + * 7. Types remapping.
>   *
>   * Algorithm determines canonical type descriptor, which is a single
>   * representative type for each truly unique type. This canonical type is the
> @@ -3060,6 +3062,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
>  		pr_debug("btf_dedup_struct_types failed:%d\n", err);
>  		goto done;
>  	}
> +	err = btf_dedup_resolve_fwds(d);
> +	if (err < 0) {
> +		pr_debug("btf_dedup_resolve_fwds failed:%d\n", err);
> +		goto done;
> +	}
>  	err = btf_dedup_ref_types(d);
>  	if (err < 0) {
>  		pr_debug("btf_dedup_ref_types failed:%d\n", err);
> @@ -4526,6 +4533,131 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
>  	return 0;
>  }
>  
> +/*
> + * Collect a map from type names to type ids for all canonical structs
> + * and unions. If the same name is shared by several canonical types
> + * use a special value 0 to indicate this fact.
> + */
> +static int btf_dedup_fill_unique_names_map(struct btf_dedup *d, struct hashmap *names_map)
> +{
> +	__u32 nr_types = btf__type_cnt(d->btf);
> +	struct btf_type *t;
> +	__u32 type_id;
> +	__u16 kind;
> +	int err;
> +
> +	/*
> +	 * Iterate over base and split module ids in order to get all
> +	 * available structs in the map.
> +	 */
> +	for (type_id = 1; type_id < nr_types; ++type_id) {
> +		t = btf_type_by_id(d->btf, type_id);
> +		kind = btf_kind(t);
> +
> +		if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
> +			continue;
> +
> +		/* Skip non-canonical types */
> +		if (type_id != d->map[type_id])
> +			continue;
> +
> +		err = hashmap__add(names_map, t->name_off, type_id);
> +		if (err == -EEXIST)
> +			err = hashmap__set(names_map, t->name_off, 0, NULL, NULL);
> +> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static int btf_dedup_resolve_fwd(struct btf_dedup *d, struct hashmap *names_map, __u32 type_id)
> +{
> +	struct btf_type *t = btf_type_by_id(d->btf, type_id);
> +	enum btf_fwd_kind fwd_kind = btf_kflag(t);
> +	__u16 cand_kind, kind = btf_kind(t);
> +	struct btf_type *cand_t;
> +	uintptr_t cand_id = 0;
> +
> +	if (kind != BTF_KIND_FWD)
> +		return 0;
> +
> +	/* Skip if this FWD already has a mapping */
> +	if (type_id != d->map[type_id])
> +		return 0;
> +
> +	hashmap__find(names_map, t->name_off, &cand_id);

would it be safer to do 

	if (!hashmap__find(names_map, t->name_off, &cand_id))
		return 0;

> +	if (!cand_id)
> +		return 0;
> +

...and might be no harm to reiterate the special meaning of 0 here (multiple
name matches -> ambiguous) since it's a valid type id (void) in other cases.
While strictly you probably don't need separate conditions for not found
and found ambiguous name, it might read a bit easier and more consistently
with other users of hashmap__find().

> +	cand_t = btf_type_by_id(d->btf, cand_id);
> +	cand_kind = btf_kind(cand_t);
> +	if (!(cand_kind == BTF_KIND_STRUCT && fwd_kind == BTF_FWD_STRUCT) &&
> +	    !(cand_kind == BTF_KIND_UNION && fwd_kind == BTF_FWD_UNION))
> +		return 0;
> +

I'd find

	if ((cand_id == BTF_KIND_STRUCT && fwd_kind != BTF_FWD_STRUCT) ||
	    (cand_id == BTF_KIND_UNION && fwd_kind != BTF_FWD_UNION))

...a bit easier to parse, but again not a big deal.

> +	d->map[type_id] = cand_id;
> +
> +	return 0;
> +}
> +
> +/*
> + * Resolve unambiguous forward declarations.
> + *
> + * The lion's share of all FWD declarations is resolved during
> + * `btf_dedup_struct_types` phase when different type graphs are
> + * compared against each other. However, if in some compilation unit a
> + * FWD declaration is not a part of a type graph compared against
> + * another type graph that declaration's canonical type would not be
> + * changed. Example:
> + *
> + * CU #1:
> + *
> + * struct foo;
> + * struct foo *some_global;
> + *
> + * CU #2:
> + *
> + * struct foo { int u; };
> + * struct foo *another_global;
> + *
> + * After `btf_dedup_struct_types` the BTF looks as follows:
> + *
> + * [1] STRUCT 'foo' size=4 vlen=1 ...
> + * [2] INT 'int' size=4 ...
> + * [3] PTR '(anon)' type_id=1
> + * [4] FWD 'foo' fwd_kind=struct
> + * [5] PTR '(anon)' type_id=4
> + *
> + * This pass assumes that such FWD declarations should be mapped to
> + * structs or unions with identical name in case if the name is not
> + * ambiguous.
> + */
> +static int btf_dedup_resolve_fwds(struct btf_dedup *d)
> +{
> +	int i, err;
> +	struct hashmap *names_map =
> +		hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL);
> +
> +	if (!names_map)
> +		return -ENOMEM;
> +
> +	err = btf_dedup_fill_unique_names_map(d, names_map);
> +	if (err < 0)
> +		goto exit;
> +
> +	for (i = 0; i < d->btf->nr_types; i++) {
> +		err = btf_dedup_resolve_fwd(d, names_map, d->btf->start_id + i);
> +		if (err < 0)
> +			goto exit;

could just break; here I suppose

> +	}
> +
> +exit:
> +	hashmap__free(names_map);
> +	return err;
> +}
> +
>  /*
>   * Compact types.
>   *
>