On 23/09/2024 15:19, Jiri Olsa wrote: > On Mon, Sep 16, 2024 at 02:49:44PM +0100, Alan Maguire wrote: > > SNIP > >> Baseline >> >> $ time pahole -J vmlinux -j1 --btf_features=default >> >> real 0m17.268s >> user 0m15.808s >> sys 0m1.415s >> >> $ time pahole -J vmlinux -j8 --btf_features=default >> >> real 0m10.768s >> user 0m30.793s >> sys 0m4.199s >> >> With these changes: >> >> $ time pahole -J vmlinux -j1 --btf_features=default >> >> real 0m16.564s >> user 0m16.029s >> sys 0m0.492s >> >> $ time pahole -J vmlinux -j8 --btf_features=default >> >> real 0m8.332s >> user 0m30.627s >> sys 0m0.714s >> >> In terms of functions encoded, 360 fewer functions make it into >> BTF due to the different approach in consistency checking, but after >> examining these cases, they do appear to be legitimately inconsistent >> functions where the optimized versions have parameter mismatches >> with the non-optimized expectations. >> >> Mileage may vary of course, and any testing folks could do would >> be greatly appreciated! > > looks good, some comments below.. I was hoping to find some way > to split the change, but can't think of any ;-) > thanks for trying it out and looking at the code Jiri! Replies below.. > thanks, > jirka > > > SNIP > >> -static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype) >> +static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype, struct elf_function *func) >> { >> struct btf *btf = encoder->btf; >> const struct btf_type *t; >> struct parameter *param; >> uint16_t nr_params, param_idx; >> int32_t id, type_id; >> + char tmp_name[KSYM_NAME_LEN]; >> + const char *name; >> + struct btf_encoder_func_state *state = &func->state; > > func could be NULL right? > good catch; I've moved the assignment to the code that deals with ELF function prototype addition. > SNIP > >> static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func) >> { >> - fn->priv = encoder->cu; >> - if (func->function) { >> - struct function *existing = func->function; >> - >> - /* If saving and we find an existing entry, we want to merge >> - * observations across both functions, checking that the >> - * "seen optimized parameters", "inconsistent prototype" >> - * and "unexpected register" status is reflected in the >> - * the func entry. >> - * If the entry is new, record encoder state required >> - * to add the local function later (encoder + type_id_off) >> - * such that we can add the function later. >> - */ >> - existing->proto.optimized_parms |= fn->proto.optimized_parms; >> - existing->proto.unexpected_reg |= fn->proto.unexpected_reg; >> - if (!existing->proto.unexpected_reg && !existing->proto.inconsistent_proto && >> - !funcs__match(encoder, func, fn)) >> - existing->proto.inconsistent_proto = 1; >> - } else { >> - func->state.type_id_off = encoder->type_id_off; >> - func->function = fn; >> - encoder->cu->functions_saved++; > > we could remove functions_saved from cu now? > good idea; removed that and the "processed" flag from ftypes. > SNIP > >> -static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn) >> +static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func) >> { >> - int btf_fnproto_id, btf_fn_id, tag_type_id; >> - struct llvm_annotation *annot; >> + int btf_fnproto_id, btf_fn_id, tag_type_id = 0; >> + int16_t component_idx = -1; >> const char *name; >> + const char *value; >> + char tmp_value[KSYM_NAME_LEN]; >> >> - btf_fnproto_id = btf_encoder__add_func_proto(encoder, &fn->proto); >> - name = function__name(fn); >> - btf_fn_id = btf_encoder__add_ref_type(encoder, BTF_KIND_FUNC, btf_fnproto_id, name, false); >> + btf_fnproto_id = btf_encoder__add_func_proto(encoder, fn ? &fn->proto : NULL, func); >> + name = func->alias ?: func->name; >> + if (btf_fnproto_id >= 0) >> + btf_fn_id = btf_encoder__add_ref_type(encoder, BTF_KIND_FUNC, btf_fnproto_id, name, false); >> if (btf_fnproto_id < 0 || btf_fn_id < 0) { >> - printf("error: failed to encode function '%s'\n", function__name(fn)); >> + printf("error: failed to encode function '%s': invalid %s\n", name, btf_fnproto_id < 0 ? "proto" : "func"); >> return -1; >> } >> - list_for_each_entry(annot, &fn->annots, node) { >> - tag_type_id = btf_encoder__add_decl_tag(encoder, annot->value, btf_fn_id, >> - annot->component_idx); >> - if (tag_type_id < 0) { >> - fprintf(stderr, "error: failed to encode tag '%s' to func %s with component_idx %d\n", >> - annot->value, name, annot->component_idx); >> - return -1; >> + if (!fn) { >> + struct btf_encoder_func_state *state = &func->state; >> + uint16_t idx; >> + >> + if (!state || state->nr_annots == 0) > > it probably can't happen, but state will be allways != NULL in here.. should it be: > > if (!func || state->nr_annots == 0) > I've added assert()s to the btf_encoder__add_func[proto]() functions to ensure that either the fn or func are non-NULL, since winding up with a NULL fn/func is more of a programming error than a state we can wind up in. > >> + return 0; >> + >> + for (idx = 0; idx < state->nr_annots; idx++) { >> + struct btf_encoder_func_annot *a = &state->annots[idx]; >> + >> + value = btf__str_by_offset(encoder->btf, a->value); >> + /* adding BTF data may result in a mode of the >> + * value string memory, so make a temporary copy. >> + */ >> + strncpy(tmp_value, value, sizeof(tmp_value)); >> + component_idx = a->component_idx; >> + >> + tag_type_id = btf_encoder__add_decl_tag(encoder, tmp_value, btf_fn_id, component_idx); >> + if (tag_type_id < 0) >> + break; >> + } >> + } else { >> + struct llvm_annotation *annot; >> + >> + list_for_each_entry(annot, &fn->annots, node) { >> + value = annot->value; >> + component_idx = annot->component_idx; >> + >> + tag_type_id = btf_encoder__add_decl_tag(encoder, value, btf_fn_id, >> + component_idx); >> + if (tag_type_id < 0) >> + break; >> } >> } >> + if (tag_type_id < 0) { >> + fprintf(stderr, "error: failed to encode tag '%s' to func %s with component_idx %d\n", >> + value, name, component_idx); >> + return -1; >> + } >> + >> return 0; >> } >> >> -static void btf_encoder__add_saved_funcs(struct btf_encoder *encoder) >> +static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder) >> { >> int i; >> >> for (i = 0; i < encoder->functions.cnt; i++) { >> struct elf_function *func = &encoder->functions.entries[i]; >> - struct function *fn = func->function; >> - struct btf_encoder *other_encoder; >> + struct btf_encoder_func_state *state = &func->state; >> + struct btf_encoder *other_encoder = NULL; >> >> - if (!fn || fn->proto.processed) >> + if (!state || !state->initialized || state->processed) >> continue; > > state is now placed directly in elf_function so will allways be state != NULL > >> - >> /* merge optimized-out status across encoders; since each >> * encoder has the same elf symbol table we can use the >> * same index to access the same elf symbol. >> */ >> btf_encoders__for_each_encoder(other_encoder) { >> - struct function *other_fn; >> + struct elf_function *other_func; >> + struct btf_encoder_func_state *other_state; >> + uint8_t optimized, unexpected, inconsistent; >> >> if (other_encoder == encoder) >> continue; >> >> - other_fn = other_encoder->functions.entries[i].function; >> - if (!other_fn) >> + other_func = &other_encoder->functions.entries[i]; >> + other_state = &other_func->state; >> + if (!other_state) >> continue; > > same as above it will allways be other_state != NULL > both fixed now. thanks! >> - fn->proto.optimized_parms |= other_fn->proto.optimized_parms; >> - fn->proto.unexpected_reg |= other_fn->proto.unexpected_reg; >> - if (other_fn->proto.inconsistent_proto) >> - fn->proto.inconsistent_proto = 1; >> - if (!fn->proto.unexpected_reg && !fn->proto.inconsistent_proto && >> - !funcs__match(encoder, func, other_fn)) >> - fn->proto.inconsistent_proto = 1; >> - other_fn->proto.processed = 1; >> + optimized = state->optimized_parms | other_state->optimized_parms; >> + unexpected = state->unexpected_reg | other_state->unexpected_reg; >> + inconsistent = state->inconsistent_proto | other_state->inconsistent_proto; >> + if (!unexpected && !inconsistent && >> + !funcs__match(encoder, func, >> + encoder->btf, state, >> + other_encoder->btf, other_state)) >> + inconsistent = 1; >> + state->optimized_parms = other_state->optimized_parms = optimized; >> + state->unexpected_reg = other_state->unexpected_reg = unexpected; >> + state->inconsistent_proto = other_state->inconsistent_proto = inconsistent; >> + >> + other_state->processed = 1; > > SNIP