On Wed, 2024-10-09 at 23:35 +0000, Ihor Solodrai wrote: [...] I think the logic makes sense, please find a few comments below. [...] > @@ -1340,44 +1355,51 @@ static inline struct elf_function *find_elf_function_in_list(struct elf_function > return NULL; > } > > -static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder) > + > +// Each element of elf_functions->entries is a list of a variable > +// number of elf_function structs, because not all encoders saved a > +// particular function. However each function saved by at least one > +// encoder needs to be added to BTF _somewhere_. Here we traverse the > +// entire elf_functions table, and for each list of elf_function > +// structs a function is added to the BTF of the encoder of the first > +// elf_function in the list. > +int btf_encoder__add_saved_funcs(struct btf_encoder *base_encoder) > { > - int i; > + struct elf_functions *functions = base_encoder->functions; > > - for (i = 0; i < encoder->functions.cnt; i++) { > - struct elf_function *func = find_elf_function_in_list(&encoder->functions.entries[i], encoder); > + for (int i = 0; i < functions->cnt; i++) { > + struct elf_function *list_head = &functions->entries[i]; > + struct btf_encoder_func_state *state; > + struct elf_function *func; > > - if (!func) > + // list_head without an owner means no encoder > + // has saved a function with this name Nit: // and /**/ style comments are now mixed in this function, I think that kernel style prefers /**/. > + if (!list_head->owner) > continue; > > - struct btf_encoder_func_state *state = &func->state; > - struct btf_encoder *other_encoder = NULL; > + func = list_head; > + state = &func->state; > > if (!state->initialized || state->processed) > continue; I don't think that `!state->initialized` check is needed after your changes. The `state->processed` flag also seems unnecessary, as algorithm traverses all records collected for a name, so each one of them would be `processed` one by one. > - /* merge optimized-out status across encoders; since each > - * encoder has the same elf symbol table we can use the > - * same index to access the same elf symbol. > - */ > - btf_encoders__for_each_encoder(other_encoder) { > - struct elf_function *other_func; > - struct btf_encoder_func_state *other_state; > - uint8_t optimized, unexpected, inconsistent; > - other_func = find_elf_function_in_list(&other_encoder->functions.entries[i], other_encoder); > > - if (other_encoder == encoder || !other_func) > + for_each_elf_function(other_func, list_head) { > + if (func == other_func) > continue; > > - other_state = &other_func->state; > - if (!other_state->initialized) > + struct btf_encoder_func_state *other_state = &other_func->state; > + uint8_t optimized, unexpected, inconsistent; > + > + if (!other_func || !other_state->initialized || other_state->processed) > continue; > + > optimized = state->optimized_parms | other_state->optimized_parms; > unexpected = state->unexpected_reg | other_state->unexpected_reg; > inconsistent = state->inconsistent_proto | other_state->inconsistent_proto; > if (!unexpected && !inconsistent && > - !funcs__match(encoder, func, > - encoder->btf, state, > - other_encoder->btf, other_state)) > + !funcs__match(func->owner, func, > + func->owner->btf, state, > + other_func->owner->btf, other_state)) > inconsistent = 1; > state->optimized_parms = other_state->optimized_parms = optimized; > state->unexpected_reg = other_state->unexpected_reg = unexpected; There is also no need to maintain merged state of `state->unexpected_reg` and company, as far as I understand there are no more consumers for this information. E.g. change the function like this: int btf_encoder__add_saved_funcs(struct btf_encoder *base_encoder) { struct elf_functions *functions = base_encoder->functions; for (int i = 0; i < functions->cnt; i++) { struct elf_function *list_head = &functions->entries[i]; struct btf_encoder_func_state *state; struct elf_function *func; // list_head without an owner means no encoder // has saved a function with this name if (!list_head->owner) continue; func = list_head; state = &func->state; bool unexpected = false, inconsistent = false; for_each_elf_function(other_func, list_head) { struct btf_encoder_func_state *other_state = &other_func->state; unexpected |= other_state->unexpected_reg; inconsistent |= other_state->inconsistent_proto; if (!unexpected && !inconsistent && !funcs__match(func->owner, func, func->owner->btf, state, other_func->owner->btf, other_state)) inconsistent = 1; } /* do not exclude functions with optimized-out parameters; they * may still be _called_ with the right parameter values, they * just do not _use_ them. Only exclude functions with * unexpected register use or multiple inconsistent prototypes. */ if (!unexpected && !inconsistent) { if (btf_encoder__add_func((struct btf_encoder *)func->owner, NULL, func)) return -1; } } return 0; } > @@ -1391,7 +1413,7 @@ static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder) > * unexpected register use or multiple inconsistent prototypes. > */ > if (!state->unexpected_reg && !state->inconsistent_proto) { > - if (btf_encoder__add_func(encoder, NULL, func)) > + if (btf_encoder__add_func((struct btf_encoder *)func->owner, NULL, func)) Nit: given that const is cast away here, is there a need to declare `->owner` field as const? > return -1; > } > state->processed = 1; [...] > diff --git a/pahole.c b/pahole.c > index 0ff6bc2..a45aa7a 100644 > --- a/pahole.c > +++ b/pahole.c > @@ -3256,23 +3256,23 @@ static int pahole_threads_collect(struct conf_load *conf, int nr_threads, void * > int error) > { > struct thread_data **threads = (struct thread_data **)thr_data; > + struct btf_encoder *encoders[nr_threads]; > int i; > int err = 0; > > if (error) > goto out; > > - for (i = 0; i < nr_threads; i++) { > - /* > - * Merge content of the btf instances of worker threads to the btf > - * instance of the primary btf_encoder. > - */ > - if (!threads[i]->btf) > - continue; > - err = btf_encoder__add_encoder(btf_encoder, threads[i]->encoder); > - if (err < 0) > - goto out; > - } > + for (i = 0; i < nr_threads; i++) > + encoders[i] = threads[i]->encoder; > + > + /* > + * Merge content of the btf instances of worker threads to the btf > + * instance of the primary btf_encoder. > + */ > + err = btf_encoder__merge_encoders(btf_encoder, encoders, nr_threads); Nit: it looks like simply adding a call to btf_encoder__add_saved_funcs() here would minimize changes to pahole_threads_collect() and make btf_encoder__merge_encoders() unnecessary. > + if (err < 0) > + goto out; > err = 0; > > out: [...]