V2 fixes a crash issue of using an uninitialized hashmap. Add a hashmap to map the string offsets from a source btf to the string offsets from a target btf to reduce overheads. btf__add_btf() calls btf__add_str() to add strings from a source to a target btf. It causes many string comparisons, and it is a major hotspot when adding a big btf. btf__add_str() uses strcmp() to check if a hash entry is the right one. The extra hashmap here compares offsets of strings, that are much cheaper. It remembers the results of btf__add_str() for later uses to reduce the cost. We are parallelizing BTF encoding for pahole by creating separated btf instances for worker threads. These per-thread btf instances will be added to the btf instance of the main thread by calling btf__add_str() to deduplicate and write out. With this patch and -j4, the running time of pahole drops to about 6.0s from 6.6s. The following lines are the summary of 'perf stat' w/o the change. 6.668126396 seconds time elapsed 13.451054000 seconds user 0.715520000 seconds sys The following lines are the summary w/ the change. 5.986973919 seconds time elapsed 12.939903000 seconds user 0.724152000 seconds sys Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxx> --- tools/lib/bpf/btf.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9aa19c89f758..8b96418000cc 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1620,20 +1620,38 @@ static int btf_commit_type(struct btf *btf, int data_sz) struct btf_pipe { const struct btf *src; struct btf *dst; + struct hashmap *str_off_map; /* map string offsets from src to dst */ }; static int btf_rewrite_str(__u32 *str_off, void *ctx) { struct btf_pipe *p = ctx; + void *mapped_off; int off; + int err; if (!*str_off) /* nothing to do for empty strings */ return 0; + if (p->str_off_map && + hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { + *str_off = (__u32)(long)mapped_off; + return 0; + } + off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); if (off < 0) return off; + /* Remember string mapping from src to dst. It avoids + * performing expensive string comparisons. + */ + if (p->str_off_map) { + err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); + if (err) + return err; + } + *str_off = off; return 0; } @@ -1680,6 +1698,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) return 0; } +static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); +static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); + int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; @@ -1713,6 +1734,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!off) return libbpf_err(-ENOMEM); + /* Map the string offsets from src_btf to the offsets from btf to improve performance */ + p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (p.str_off_map == NULL) + return libbpf_err(-ENOMEM); + /* bulk copy types data for all types from src_btf */ memcpy(t, src_btf->types_data, data_sz); @@ -1754,6 +1780,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) btf->hdr->str_off += data_sz; btf->nr_types += cnt; + hashmap__free(p.str_off_map); + /* return type ID of the first added BTF type */ return btf->start_id + btf->nr_types - cnt; err_out: @@ -1767,6 +1795,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) * wasn't modified, so doesn't need restoring, see big comment above */ btf->hdr->str_len = old_strs_len; + if (p.str_off_map) + hashmap__free(p.str_off_map); + return libbpf_err(err); } -- 2.30.2