> On Feb 28, 2019, at 11:40 AM, Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> wrote: > > On Thu, Feb 28, 2019 at 11:02 AM Song Liu <songliubraving@xxxxxx> wrote: >> >> >> >>> On Feb 28, 2019, at 10:51 AM, Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> wrote: >>> >>> On Thu, Feb 28, 2019 at 10:27 AM Song Liu <liu.song.a23@xxxxxxxxx> wrote: >>>> >>>> On Wed, Feb 27, 2019 at 2:47 PM Andrii Nakryiko <andriin@xxxxxx> wrote: >>>>> >>>>> Default size of dedup table (16k) is good enough for most binaries, even >>>>> typical vmlinux images. But there are cases of binaries with huge amount >>>>> of BTF types (e.g., allyesconfig variants of kernel), which benefit from >>>>> having bigger dedup table size to lower amount of unnecessary hash >>>>> collisions. Tools like pahole, thus, can tune this parameter to reach >>>>> optimal performance. >>>>> >>>>> This change also serves double purpose of allowing tests to force hash >>>>> collisions to test some corner cases, used in follow up patch. >>>>> >>>>> Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> >>>>> --- >>>>> tools/lib/bpf/btf.c | 43 ++++++++++++++++++++++++++----------------- >>>>> tools/lib/bpf/btf.h | 1 + >>>>> 2 files changed, 27 insertions(+), 17 deletions(-) >>>>> >>>>> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c >>>>> index 68b50e9bbde1..6bbb710216e6 100644 >>>>> --- a/tools/lib/bpf/btf.c >>>>> +++ b/tools/lib/bpf/btf.c >>>>> @@ -1070,8 +1070,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, >>>>> return err; >>>>> } >>>>> >>>>> -#define BTF_DEDUP_TABLE_SIZE_LOG 14 >>>>> -#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1) >>>>> +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14) >>>>> #define BTF_UNPROCESSED_ID ((__u32)-1) >>>>> #define BTF_IN_PROGRESS_ID ((__u32)-2) >>>>> >>>>> @@ -1128,18 +1127,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value) >>>>> #undef GOLDEN_RATIO_PRIME >>>>> } >>>>> >>>>> -#define for_each_hash_node(table, hash, node) \ >>>>> - for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next) >>>>> +#define for_each_dedup_cand(d, hash, node) \ >>>>> + for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \ >>>>> + node; \ >>>>> + node = node->next) >>>>> >>>>> static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) >>>>> { >>>>> struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); >>>>> + int bucket = hash & (d->opts.dedup_table_size - 1); >>>>> >>>>> if (!node) >>>>> return -ENOMEM; >>>>> node->type_id = type_id; >>>>> - node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD]; >>>>> - d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node; >>>>> + node->next = d->dedup_table[bucket]; >>>>> + d->dedup_table[bucket] = node; >>>>> return 0; >>>>> } >>>>> >>>>> @@ -1177,7 +1179,7 @@ static void btf_dedup_table_free(struct btf_dedup *d) >>>>> if (!d->dedup_table) >>>>> return; >>>>> >>>>> - for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) { >>>>> + for (i = 0; i < d->opts.dedup_table_size; i++) { >>>>> while (d->dedup_table[i]) { >>>>> tmp = d->dedup_table[i]; >>>>> d->dedup_table[i] = tmp->next; >>>>> @@ -1221,10 +1223,19 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, >>>>> if (!d) >>>>> return ERR_PTR(-ENOMEM); >>>>> >>>>> + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; >>>>> + /* ensure table size is power of two and limit to 2G */ >>>>> + d->opts.dedup_table_size = opts && opts->dedup_table_size >>>>> + ? opts->dedup_table_size >>>>> + : BTF_DEDUP_TABLE_DEFAULT_SIZE; >>>>> + for (i = 0; i < 31 && (1 << i) < d->opts.dedup_table_size; i++) >>>>> + ; >>>>> + d->opts.dedup_table_size = 1 << i; >>>>> + >>>> So this is the roundup log2 logic? How about we define some marcos >>>> to make it cleaner? Like >>>> >>>> #define BTF_DEDUP_TABLE_MAX_SIZE xxxx >>> >>> You mean hide this loop behind macro? Or specify max size as a macro? >>> If former, I'd rather do static function, something like >>> roundup_pow_of_2_with_max? >> >> I meant specify max size as a macro. Doing static function is also a >> good idea. > > Ok, will do. > >> >>> >>>> >>>> Also, how big hash table do we need for allyesconfig? 2G seems really >>>> big to me. >>> >>> It works even with 16k and takes about 45 seconds. I didn't want to >>> artificially limit this to something small and left it up to user. >>> This algorithm can be used for arbitrary binaries after pahole's >>> dwarf2btf conversion, which could be much bigger than kernel, so I >>> didn't want to artificially limit this. Realistically, unlikely that >>> you'll need more than 64k-128k. >> >> How about we show some warning like "You are using really big size" for >> too big numbers, like 16M+? > > Tbh, adding extra arbitrary constant and emitting warning seems > excessive. If it's huge number, allocation will fail and we'll return > error. If it's just big, RSS will be high, and if user cares he should > be able to deduce that he specified unreasonable size. In general, I > doubt this value will be overriden often, most uses will probably just > use default setting. Fair enough. Let's go with 2G as-is then. > >> >> Thanks, >> Song >> >>> >>>> >>>>> d->btf = btf; >>>>> d->btf_ext = btf_ext; >>>>> >>>>> - d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG, >>>>> + d->dedup_table = calloc(d->opts.dedup_table_size, >>>>> sizeof(struct btf_dedup_node *)); >>>>> if (!d->dedup_table) { >>>>> err = -ENOMEM; >>>>> @@ -1249,8 +1260,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, >>>>> for (i = 0; i <= btf->nr_types; i++) >>>>> d->hypot_map[i] = BTF_UNPROCESSED_ID; >>>>> >>>>> - d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; >>>>> - >>>>> done: >>>>> if (err) { >>>>> btf_dedup_free(d); >>>>> @@ -1824,7 +1833,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) >>>>> >>>>> case BTF_KIND_INT: >>>>> h = btf_hash_int(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> cand = d->btf->types[cand_node->type_id]; >>>>> if (btf_equal_int(t, cand)) { >>>>> new_id = cand_node->type_id; >>>>> @@ -1835,7 +1844,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) >>>>> >>>>> case BTF_KIND_ENUM: >>>>> h = btf_hash_enum(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> cand = d->btf->types[cand_node->type_id]; >>>>> if (btf_equal_enum(t, cand)) { >>>>> new_id = cand_node->type_id; >>>>> @@ -1846,7 +1855,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) >>>>> >>>>> case BTF_KIND_FWD: >>>>> h = btf_hash_common(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> cand = d->btf->types[cand_node->type_id]; >>>>> if (btf_equal_common(t, cand)) { >>>>> new_id = cand_node->type_id; >>>>> @@ -2263,7 +2272,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) >>>>> return 0; >>>>> >>>>> h = btf_hash_struct(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> int eq; >>>>> >>>>> btf_dedup_clear_hypot_map(d); >>>>> @@ -2349,7 +2358,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) >>>>> t->type = ref_type_id; >>>>> >>>>> h = btf_hash_common(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> cand = d->btf->types[cand_node->type_id]; >>>>> if (btf_equal_common(t, cand)) { >>>>> new_id = cand_node->type_id; >>>>> @@ -2372,7 +2381,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) >>>>> info->index_type = ref_type_id; >>>>> >>>>> h = btf_hash_array(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> cand = d->btf->types[cand_node->type_id]; >>>>> if (btf_equal_array(t, cand)) { >>>>> new_id = cand_node->type_id; >>>>> @@ -2403,7 +2412,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) >>>>> } >>>>> >>>>> h = btf_hash_fnproto(t); >>>>> - for_each_hash_node(d->dedup_table, h, cand_node) { >>>>> + for_each_dedup_cand(d, h, cand_node) { >>>>> cand = d->btf->types[cand_node->type_id]; >>>>> if (btf_equal_fnproto(t, cand)) { >>>>> new_id = cand_node->type_id; >>>>> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h >>>>> index b60bb7cf5fff..28a1e1e59861 100644 >>>>> --- a/tools/lib/bpf/btf.h >>>>> +++ b/tools/lib/bpf/btf.h >>>>> @@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); >>>>> LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); >>>>> >>>>> struct btf_dedup_opts { >>>>> + unsigned int dedup_table_size; >>>>> bool dont_resolve_fwds; >>>>> }; >>>>> >>>>> -- >>>>> 2.17.1