On Mon, 2024-09-16 at 14:49 +0100, Alan Maguire wrote: [...] > When this is done, memory consumption peaks at 1Gb rather than > ~4Gb for vmlinux generation. Time taken appears to be approximately > the same for -j1, but slightly faster for multiple threads; > for example: Hi Alan, Sorry for delayed reply. I tried this patch with selftests vmlinux and see huge reduction indeed: $ /usr/bin/time -v \ pahole -j1 --btf_features=consistent_func --btf_encode_detached=old.btf vmlinux ... Maximum resident set size (kbytes): 3123660 $ /usr/bin/time -v \ pahole -j1 --btf_features=consistent_func --btf_encode_detached=new.btf vmlinux ... Maximum resident set size (kbytes): 464400 [...] > In terms of functions encoded, 360 fewer functions make it into > BTF due to the different approach in consistency checking, but after > examining these cases, they do appear to be legitimately inconsistent > functions where the optimized versions have parameter mismatches > with the non-optimized expectations. > > Mileage may vary of course, and any testing folks could do would > be greatly appreciated! Interestingly, I don't see any difference in generated "consistent" funcs (BPF selftests kernel config, compiled using mainline clang): $ comm -3 <(bpftool btf dump file new.btf | grep ' FUNC ' | sed "s/.*'\(.*\)'.*/\1/" | sort) \ <(bpftool btf dump file old.btf | grep ' FUNC ' | sed "s/.*'\(.*\)'.*/\1/" | sort) \ | tee | wc -l 0 A few nitpicks below, but overall looks great. Reviewed-by: Eduard Zingerman <eddyz87@xxxxxxxxx> [...] > @@ -669,18 +692,25 @@ static int32_t btf_encoder__tag_type(struct btf_encoder *encoder, uint32_t tag_t > return encoder->type_id_off + tag_type; > } > > -static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype) > +static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct ftype *ftype, struct elf_function *func) It is a bit unfortunate that we now have this "doubled" representation and both ftype and elf_function pointers have to be passed through several layers. Ihor (in CC) is now working on having single btf_encoder for each processed CU (in order to be able to sort btf's by CU names before merging to a single BTF for dedup, thus making reproducible builds cheaper). I wonder if this could be further extended to have a separate BTF for functions and merge only consistent functions from this BTF to a final BTF. [...] > @@ -694,20 +724,34 @@ static int32_t btf_encoder__add_func_proto(struct btf_encoder *encoder, struct f > > /* add parameters */ > param_idx = 0; > - ftype__for_each_parameter(ftype, param) { > - const char *name = parameter__name(param); > + if (ftype) { > + ftype__for_each_parameter(ftype, param) { > + const char *name = parameter__name(param); > + > + type_id = param->tag.type == 0 ? 0 : encoder->type_id_off + param->tag.type; > + ++param_idx; > + if (btf_encoder__add_func_param(encoder, name, type_id, param_idx == nr_params)) > + return -1; > + } > > - type_id = param->tag.type == 0 ? 0 : encoder->type_id_off + param->tag.type; > ++param_idx; > - if (btf_encoder__add_func_param(encoder, name, type_id, param_idx == nr_params)) > - return -1; > - } > + if (ftype->unspec_parms) > + if (btf_encoder__add_func_param(encoder, NULL, 0, param_idx == nr_params)) > + return -1; > + } else { > + for (param_idx = 0; param_idx < nr_params; param_idx++) { > + struct btf_encoder_func_parm *p = &state->parms[param_idx]; > + name = btf__name_by_offset(btf, p->name_off); > > - ++param_idx; > - if (ftype->unspec_parms) > - if (btf_encoder__add_func_param(encoder, NULL, 0, param_idx == nr_params)) > - return -1; > + /* adding BTF data may result in a move of the > + * name string memory, so make a temporary copy. > + */ > + strncpy(tmp_name, name, sizeof(tmp_name)); When compiling with fresh gcc (14.2.1) there is a warning reported at this line: btf_encoder.c:749:25: warning: ‘strncpy’ specified bound 128 equals destination size [-Wstringop-truncation] 749 | strncpy(tmp_name, name, sizeof(tmp_name)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Which seems to be a legit warning, given section CAVEATS from strncpy man page. There is also a second warning like this (see below). (when compiled with BUILD_TYPE=Release). > > + if (btf_encoder__add_func_param(encoder, tmp_name, p->type_id, param_idx == nr_params)) > + return -1; > + } > + } > return id; > } > > @@ -831,53 +875,170 @@ static int32_t btf_encoder__add_decl_tag(struct btf_encoder *encoder, const char [...] > -static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func, struct function *f2) > +static bool names__match(struct btf *btf1, const struct btf_type *t1, > + struct btf *btf2, const struct btf_type *t2) > { [...] > +static bool types__match(struct btf_encoder *encoder, > + struct btf *btf1, int type_id1, > + struct btf *btf2, int type_id2) > +{ [...] > + switch (btf_kind(t1)) { > + case BTF_KIND_INT: Nit: ints have a meaningful .size field and are followed by a u32 word, both should be compared. > + case BTF_KIND_FLOAT: > + case BTF_KIND_FWD: Nit: BTF_KIND_FWD should go to default, fwd__kind steps over it. > + case BTF_KIND_TYPEDEF: > + case BTF_KIND_STRUCT: > + case BTF_KIND_UNION: > + case BTF_KIND_ENUM: > + case BTF_KIND_ENUM64: > + return names__match(btf1, t1, btf2, t2); > + case BTF_KIND_PTR: > + case BTF_KIND_VOLATILE: > + case BTF_KIND_CONST: > + case BTF_KIND_RESTRICT: > + case BTF_KIND_TYPE_TAG: > + id1 = t1->type; > + id2 = t2->type; > + break; > + case BTF_KIND_ARRAY: { > + const struct btf_array *a1 = btf_array(t1); > + const struct btf_array *a2 = btf_array(t2); > + > + if (a1->nelems != a2->nelems) > + return false; > + id1 = a1->type; > + id2 = a2->type; > + break; > + } > + case BTF_KIND_FUNC_PROTO: { > + const struct btf_param *p1 = btf_params(t1); > + const struct btf_param *p2 = btf_params(t2); > + int i, vlen = btf_vlen(t1); > + > + if (vlen != btf_vlen(t2)) > + return false; > + if (!types__match(encoder, btf1, t1->type, > + btf2, t2->type)) > + return false; > + for (i = 0; i < vlen; i++, p1++, p2++) { > + if (!types__match(encoder, btf1, t1->type, > + btf2, t2->type)) > + return false; > + } > + return true; > + } > + default: > + return false; > + } > + } while (1); > + > + return false; > +} > + > +static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func, > + struct btf *btf1, struct btf_encoder_func_state *s1, > + struct btf *btf2, struct btf_encoder_func_state *s2) > +{ > + uint8_t i; > + > + if (!s1->initialized || !s2->initialized) > return true; Nit: is it even possible to get here when !s1->initialized || !s2->initialized? Maybe log something about inconsistent state? > > - if (!func->state.got_proto) > - func->state.got_proto = proto__get(f1, func->state.proto, sizeof(func->state.proto)); > + if (s1->nr_parms != s2->nr_parms) { > + btf_encoder__log_func_skip(encoder, func, > + "param count mismatch; %d params != %d params\n", > + s1->nr_parms, s2->nr_parms); > + return false; > + } > + if (!types__match(encoder, btf1, s1->ret_type_id, btf2, s2->ret_type_id)) { > + btf_encoder__log_func_skip(encoder, func, "return type mismatch\n"); > + return false; > + } > + if (s1->nr_parms == 0) > + return true; > > - if (proto__get(f2, proto, sizeof(proto))) { > - if (strcmp(func->state.proto, proto) != 0) { > - if (encoder->verbose) > - printf("function mismatch for '%s'('%s'): '%s' != '%s'\n", > - name, f1->alias ?: name, > - func->state.proto, proto); > + for (i = 0; i < s1->nr_parms; i++) { > + if (!types__match(encoder, btf1, s1->parms[i].type_id, > + btf2, s2->parms[i].type_id)) { > + if (encoder->verbose) { > + const char *p1 = btf__name_by_offset(btf1, s1->parms[i].name_off); > + const char *p2 = btf__name_by_offset(btf2, s2->parms[i].name_off); > + > + btf_encoder__log_func_skip(encoder, func, > + "param type mismatch for param#%d %s %s %s\n", > + i + 1, > + p1 ?: "", > + p1 && p2 ? "!=" : "", > + p2 ?: ""); > + } > return false; > } > } > @@ -886,119 +1047,212 @@ static bool funcs__match(struct btf_encoder *encoder, struct elf_function *func, [...] > -static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn) > +static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn, struct elf_function *func) > { [...] > + if (!fn) { > + struct btf_encoder_func_state *state = &func->state; > + uint16_t idx; > + > + if (!state || state->nr_annots == 0) > + return 0; > + > + for (idx = 0; idx < state->nr_annots; idx++) { > + struct btf_encoder_func_annot *a = &state->annots[idx]; > + > + value = btf__str_by_offset(encoder->btf, a->value); > + /* adding BTF data may result in a mode of the > + * value string memory, so make a temporary copy. > + */ > + strncpy(tmp_value, value, sizeof(tmp_value)); (Second warning from gcc 14.2.1 is reported here) > + component_idx = a->component_idx; > + > + tag_type_id = btf_encoder__add_decl_tag(encoder, tmp_value, btf_fn_id, component_idx); > + if (tag_type_id < 0) > + break; > + } [...]