Re: [PATCH v2 dwarves 5/5] btf_encoder: switch to shared elf_functions table

Eduard Zingerman <eddyz87@xxxxxxxxx> · Thu, 10 Oct 2024 01:57:43 -0700

On Wed, 2024-10-09 at 23:35 +0000, Ihor Solodrai wrote:

[...]

I think the logic makes sense, please find a few comments below.

[...]

> @@ -1340,44 +1355,51 @@ static inline struct elf_function *find_elf_function_in_list(struct elf_function
>  	return NULL;
>  }
>  
> -static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder)
> +
> +// Each element of elf_functions->entries is a list of a variable
> +// number of elf_function structs, because not all encoders saved a
> +// particular function.  However each function saved by at least one
> +// encoder needs to be added to BTF _somewhere_. Here we traverse the
> +// entire elf_functions table, and for each list of elf_function
> +// structs a function is added to the BTF of the encoder of the first
> +// elf_function in the list.
> +int btf_encoder__add_saved_funcs(struct btf_encoder *base_encoder)
>  {
> -	int i;
> +	struct elf_functions *functions = base_encoder->functions;
>  
> -	for (i = 0; i < encoder->functions.cnt; i++) {
> -		struct elf_function *func = find_elf_function_in_list(&encoder->functions.entries[i], encoder);
> +	for (int i = 0; i < functions->cnt; i++) {
> +		struct elf_function *list_head = &functions->entries[i];
> +		struct btf_encoder_func_state *state;
> +		struct elf_function *func;
>  
> -		if (!func)
> +		// list_head without an owner means no encoder
> +		// has saved a function with this name

Nit: // and /**/ style comments are now mixed in this function,
     I think that kernel style prefers /**/.

> +		if (!list_head->owner)
>  			continue;
>  
> -		struct btf_encoder_func_state *state = &func->state;
> -		struct btf_encoder *other_encoder = NULL;
> +		func = list_head;
> +		state = &func->state;
>  
>  		if (!state->initialized || state->processed)
>  			continue;

I don't think that `!state->initialized` check is needed after your
changes. The `state->processed` flag also seems unnecessary, as
algorithm traverses all records collected for a name, so each one of
them would be `processed` one by one.

> -		/* merge optimized-out status across encoders; since each
> -		 * encoder has the same elf symbol table we can use the
> -		 * same index to access the same elf symbol.
> -		 */
> -		btf_encoders__for_each_encoder(other_encoder) {
> -			struct elf_function *other_func;
> -			struct btf_encoder_func_state *other_state;
> -			uint8_t optimized, unexpected, inconsistent;
> -			other_func = find_elf_function_in_list(&other_encoder->functions.entries[i], other_encoder);
>  
> -			if (other_encoder == encoder || !other_func)
> +		for_each_elf_function(other_func, list_head) {
> +			if (func == other_func)
>  				continue;
>  
> -			other_state = &other_func->state;
> -			if (!other_state->initialized)
> +			struct btf_encoder_func_state *other_state = &other_func->state;
> +			uint8_t optimized, unexpected, inconsistent;
> +
> +			if (!other_func || !other_state->initialized || other_state->processed)
>  				continue;
> +
>  			optimized = state->optimized_parms | other_state->optimized_parms;
>  			unexpected = state->unexpected_reg | other_state->unexpected_reg;
>  			inconsistent = state->inconsistent_proto | other_state->inconsistent_proto;
>  			if (!unexpected && !inconsistent &&
> -			    !funcs__match(encoder, func,
> -					  encoder->btf, state,
> -					  other_encoder->btf, other_state))
> +			    !funcs__match(func->owner, func,
> +					  func->owner->btf, state,
> +					  other_func->owner->btf, other_state))
>  				inconsistent = 1;
>  			state->optimized_parms = other_state->optimized_parms = optimized;
>  			state->unexpected_reg = other_state->unexpected_reg = unexpected;

There is also no need to maintain merged state of
`state->unexpected_reg` and company, as far as I understand there are
no more consumers for this information.
E.g. change the function like this:

  int btf_encoder__add_saved_funcs(struct btf_encoder *base_encoder)
  {
	struct elf_functions *functions = base_encoder->functions;

	for (int i = 0; i < functions->cnt; i++) {
		struct elf_function *list_head = &functions->entries[i];
		struct btf_encoder_func_state *state;
		struct elf_function *func;

		// list_head without an owner means no encoder
		// has saved a function with this name
		if (!list_head->owner)
			continue;

		func = list_head;
		state = &func->state;

		bool unexpected = false, inconsistent = false;

                for_each_elf_function(other_func, list_head) {
			struct btf_encoder_func_state *other_state = &other_func->state;

			unexpected |= other_state->unexpected_reg;
			inconsistent |=  other_state->inconsistent_proto;
			if (!unexpected && !inconsistent &&
			    !funcs__match(func->owner, func,
					  func->owner->btf, state,
					  other_func->owner->btf, other_state))
				inconsistent = 1;
		}
		/* do not exclude functions with optimized-out parameters; they
		 * may still be _called_ with the right parameter values, they
		 * just do not _use_ them.  Only exclude functions with
		 * unexpected register use or multiple inconsistent prototypes.
		 */
		if (!unexpected && !inconsistent) {
			if (btf_encoder__add_func((struct btf_encoder *)func->owner, NULL, func))
				return -1;
		}
	}
	return 0;
  }

> @@ -1391,7 +1413,7 @@ static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder)
>  		 * unexpected register use or multiple inconsistent prototypes.
>  		 */
>  		if (!state->unexpected_reg && !state->inconsistent_proto) {
> -			if (btf_encoder__add_func(encoder, NULL, func))
> +			if (btf_encoder__add_func((struct btf_encoder *)func->owner, NULL, func))

Nit: given that const is cast away here, is there a need to declare `->owner` field as const?

>  				return -1;
>  		}
>  		state->processed = 1;

[...]

> diff --git a/pahole.c b/pahole.c
> index 0ff6bc2..a45aa7a 100644
> --- a/pahole.c
> +++ b/pahole.c
> @@ -3256,23 +3256,23 @@ static int pahole_threads_collect(struct conf_load *conf, int nr_threads, void *
>  				  int error)
>  {
>  	struct thread_data **threads = (struct thread_data **)thr_data;
> +	struct btf_encoder *encoders[nr_threads];
>  	int i;
>  	int err = 0;
>  
>  	if (error)
>  		goto out;
>  
> -	for (i = 0; i < nr_threads; i++) {
> -		/*
> -		 * Merge content of the btf instances of worker threads to the btf
> -		 * instance of the primary btf_encoder.
> -                */
> -		if (!threads[i]->btf)
> -			continue;
> -		err = btf_encoder__add_encoder(btf_encoder, threads[i]->encoder);
> -		if (err < 0)
> -			goto out;
> -	}
> +	for (i = 0; i < nr_threads; i++)
> +		encoders[i] = threads[i]->encoder;
> +
> +	/*
> +	 * Merge content of the btf instances of worker threads to the btf
> +	 * instance of the primary btf_encoder.
> +	 */
> +	err = btf_encoder__merge_encoders(btf_encoder, encoders, nr_threads);

Nit: it looks like simply adding a call to btf_encoder__add_saved_funcs()
     here would minimize changes to pahole_threads_collect() and make
     btf_encoder__merge_encoders() unnecessary.

> +	if (err < 0)
> +		goto out;
>  	err = 0;
>  
>  out:

[...]