Re: [PATCH v2 dwarves 1/3] btf_encoder: record BTF-centric function state instead of DWARF-centric

Eduard Zingerman <eddyz87@xxxxxxxxx> · Fri, 20 Sep 2024 12:18:37 -0700

On Mon, 2024-09-16 at 14:49 +0100, Alan Maguire wrote:

[...]

> When this is done, memory consumption peaks at 1Gb rather than
> ~4Gb for vmlinux generation.  Time taken appears to be approximately
> the same for -j1, but slightly faster for multiple threads;
> for example:

Hi Alan,

Sorry for delayed reply.
I tried this patch with selftests vmlinux and see huge reduction indeed:

$ /usr/bin/time -v \
    pahole -j1 --btf_features=consistent_func --btf_encode_detached=old.btf vmlinux
  ...
  Maximum resident set size (kbytes): 3123660

$ /usr/bin/time -v \
    pahole -j1 --btf_features=consistent_func --btf_encode_detached=new.btf vmlinux
  ...
  Maximum resident set size (kbytes): 464400

[...]

> In terms of functions encoded, 360 fewer functions make it into
> BTF due to the different approach in consistency checking, but after
> examining these cases, they do appear to be legitimately inconsistent
> functions where the optimized versions have parameter mismatches
> with the non-optimized expectations.
> 
> Mileage may vary of course, and any testing folks could do would
> be greatly appreciated!

Interestingly, I don't see any difference in generated "consistent"
funcs (BPF selftests kernel config, compiled using mainline clang):

$ comm -3 <(bpftool btf dump file new.btf | grep ' FUNC ' | sed "s/.*'\(.*\)'.*/\1/" | sort) \
          <(bpftool btf dump file old.btf | grep ' FUNC ' | sed "s/.*'\(.*\)'.*/\1/" | sort) \
          | tee | wc -l
0

A few nitpicks below, but overall looks great.

Reviewed-by: Eduard Zingerman <eddyz87@xxxxxxxxx>

[...]

> @@ -669,18 +692,25 @@ static int32_t btf_encoder__tag_type(struct btf_encoder *encoder, uint32_t tag_t
>  	return encoder->type_id_off + tag_type;
>  }
>  
> -static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype)
> +static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype, struct elf_function *func)

It is a bit unfortunate that we now have this "doubled" representation
and both ftype and elf_function pointers have to be passed through
several layers.
Ihor (in CC) is now working on having single btf_encoder for each
processed CU (in order to be able to sort btf's by CU names before
merging to a single BTF for dedup, thus making reproducible builds
cheaper). I wonder if this could be further extended to have a
separate BTF for functions and merge only consistent functions from
this BTF to a final BTF.

[...]

> @@ -694,20 +724,34 @@ static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct f
>  
>  	/* add parameters */
>  	param_idx = 0;
> -	ftype__for_each_parameter(ftype, param) {
> -		const char *name = parameter__name(param);
> +	if (ftype) {
> +		ftype__for_each_parameter(ftype, param) {
> +			const char *name = parameter__name(param);
> +
> +			type_id = param->tag.type == 0 ? 0 : encoder->type_id_off + param->tag.type;
> +			++param_idx;
> +			if (btf_encoder__add_func_param(encoder, name, type_id, param_idx == nr_params))
> +				return -1;
> +		}
>  
> -		type_id = param->tag.type == 0 ? 0 : encoder->type_id_off + param->tag.type;
>  		++param_idx;
> -		if (btf_encoder__add_func_param(encoder, name, type_id, param_idx == nr_params))
> -			return -1;
> -	}
> +		if (ftype->unspec_parms)
> +			if (btf_encoder__add_func_param(encoder, NULL, 0, param_idx == nr_params))
> +				return -1;
> +	} else {
> +		for (param_idx = 0; param_idx < nr_params; param_idx++) {
> +			struct btf_encoder_func_parm *p = &state->parms[param_idx];
> +			name = btf__name_by_offset(btf, p->name_off);
>  
> -	++param_idx;
> -	if (ftype->unspec_parms)
> -		if (btf_encoder__add_func_param(encoder, NULL, 0, param_idx == nr_params))
> -			return -1;
> +			/* adding BTF data may result in a move of the
> +			 * name string memory, so make a temporary copy.
> +			 */
> +			strncpy(tmp_name, name, sizeof(tmp_name));

When compiling with fresh gcc (14.2.1) there is a warning reported at this line:

btf_encoder.c:749:25: warning: ‘strncpy’ specified bound 128 equals destination size [-Wstringop-truncation]
  749 |                         strncpy(tmp_name, name, sizeof(tmp_name));
      |                         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Which seems to be a legit warning, given section CAVEATS from strncpy man page.
There is also a second warning like this (see below).
(when compiled with BUILD_TYPE=Release).

>  
> +			if (btf_encoder__add_func_param(encoder, tmp_name, p->type_id, param_idx == nr_params))
> +				return -1;
> +		}
> +	}
>  	return id;
>  }
>  
> @@ -831,53 +875,170 @@ static int32_t btf_encoder__add_decl_tag(struct btf_encoder *encoder, const char

[...]

> -static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func, struct function *f2)
> +static bool names__match(struct btf *btf1, const struct btf_type *t1,
> +			struct btf *btf2, const struct btf_type *t2)
>  {

[...]

> +static bool types__match(struct btf_encoder *encoder,
> +			 struct btf *btf1, int type_id1,
> +			 struct btf *btf2, int type_id2)
> +{

[...]

> +		switch (btf_kind(t1)) {
> +		case BTF_KIND_INT:

Nit: ints have a meaningful .size field and are followed by a u32 word,
     both should be compared.

> +		case BTF_KIND_FLOAT:
> +		case BTF_KIND_FWD:

Nit: BTF_KIND_FWD should go to default, fwd__kind steps over it.

> +		case BTF_KIND_TYPEDEF:
> +		case BTF_KIND_STRUCT:
> +		case BTF_KIND_UNION:
> +		case BTF_KIND_ENUM:
> +		case BTF_KIND_ENUM64:
> +			return names__match(btf1, t1, btf2, t2);
> +		case BTF_KIND_PTR:
> +		case BTF_KIND_VOLATILE:
> +		case BTF_KIND_CONST:
> +		case BTF_KIND_RESTRICT:
> +		case BTF_KIND_TYPE_TAG:
> +			id1 = t1->type;
> +			id2 = t2->type;
> +			break;
> +		case BTF_KIND_ARRAY: {
> +			const struct btf_array *a1 = btf_array(t1);
> +			const struct btf_array *a2 = btf_array(t2);
> +
> +			if (a1->nelems != a2->nelems)
> +				return false;
> +			id1 = a1->type;
> +			id2 = a2->type;
> +			break;
> +		}
> +		case BTF_KIND_FUNC_PROTO: {
> +			const struct btf_param *p1 = btf_params(t1);
> +			const struct btf_param *p2 = btf_params(t2);
> +			int i, vlen = btf_vlen(t1);
> +
> +			if (vlen != btf_vlen(t2))
> +				return false;
> +			if (!types__match(encoder, btf1, t1->type,
> +					  btf2, t2->type))
> +				return false;
> +			for (i = 0; i < vlen; i++, p1++, p2++) {
> +				if (!types__match(encoder, btf1, t1->type,
> +						  btf2, t2->type))
> +					return false;
> +			}
> +			return true;
> +		}
> +		default:
> +			return false;
> +		}
> +	} while (1);
> +
> +	return false;
> +}
> +
> +static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func,
> +			 struct btf *btf1, struct btf_encoder_func_state *s1,
> +			 struct btf *btf2, struct btf_encoder_func_state *s2)
> +{
> +	uint8_t i;
> +
> +	if (!s1->initialized || !s2->initialized)
>  		return true;

Nit: is it even possible to get here when !s1->initialized || !s2->initialized?
     Maybe log something about inconsistent state?

>  
> -	if (!func->state.got_proto)
> -		func->state.got_proto = proto__get(f1, func->state.proto, sizeof(func->state.proto));
> +	if (s1->nr_parms != s2->nr_parms) {
> +		btf_encoder__log_func_skip(encoder, func,
> +					   "param count mismatch; %d params != %d params\n",
> +					   s1->nr_parms, s2->nr_parms);
> +		return false;
> +	}
> +	if (!types__match(encoder, btf1, s1->ret_type_id, btf2, s2->ret_type_id)) {
> +		btf_encoder__log_func_skip(encoder, func, "return type mismatch\n");
> +		return false;
> +	}
> +	if (s1->nr_parms == 0)
> +		return true;
>  
> -	if (proto__get(f2, proto, sizeof(proto))) {
> -		if (strcmp(func->state.proto, proto) != 0) {
> -			if (encoder->verbose)
> -				printf("function mismatch for '%s'('%s'): '%s' != '%s'\n",
> -				       name, f1->alias ?: name,
> -				       func->state.proto, proto);
> +	for (i = 0; i < s1->nr_parms; i++) {
> +		if (!types__match(encoder, btf1, s1->parms[i].type_id,
> +				  btf2, s2->parms[i].type_id)) {
> +			if (encoder->verbose) {
> +				const char *p1 = btf__name_by_offset(btf1, s1->parms[i].name_off);
> +				const char *p2 = btf__name_by_offset(btf2, s2->parms[i].name_off);
> +
> +				btf_encoder__log_func_skip(encoder, func,
> +							   "param type mismatch for param#%d %s %s %s\n",
> +							   i + 1,
> +							   p1 ?: "",
> +							   p1 && p2 ? "!=" : "",
> +							   p2 ?: "");
> +			}
>  			return false;
>  		}
>  	}
> @@ -886,119 +1047,212 @@ static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func,

[...]

> -static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn)
> +static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func)
>  {

[...]

> +	if (!fn) {
> +		struct btf_encoder_func_state *state = &func->state;
> +		uint16_t idx;
> +
> +		if (!state || state->nr_annots == 0)
> +			return 0;
> +
> +		for (idx = 0; idx < state->nr_annots; idx++) {
> +			struct btf_encoder_func_annot *a = &state->annots[idx];
> +
> +			value = btf__str_by_offset(encoder->btf, a->value);
> +			/* adding BTF data may result in a mode of the
> +			 * value string memory, so make a temporary copy.
> +			 */
> +			strncpy(tmp_value, value, sizeof(tmp_value));

(Second warning from gcc 14.2.1 is reported here)

> +			component_idx = a->component_idx;
> +
> +			tag_type_id = btf_encoder__add_decl_tag(encoder, tmp_value, btf_fn_id, component_idx);
> +			if (tag_type_id < 0)
> +				break;
> +		}

[...]