At gcc optimization level O2 or higher, many function optimizations occur such as - constant propagation (.constprop.0); - interprocedural scalar replacement of aggregates, removal of unused parameters and replacement of parameters passed by reference by parameters passed by value (.isra.0) See [1] for details. Currently BTF encoding does not handle such optimized functions that get renamed with a "." suffix such as ".isra.0", ".constprop.0". This is safer because such suffixes can often indicate parameters have been optimized out. Since we can now spot this, support matching to a "." suffix and represent the function in BTF if it does not have optimized-out parameters. First an attempt to match by exact name is made; if that fails we fall back to checking for a "."-suffixed name. The BTF representation will use the original function name "foo" not "foo.isra.0" for consistency with DWARF representation. There is a complication however, and this arises because we process each CU separately and merge BTF when complete. Different CUs may optimize differently, so in one CU, a function may have optimized-out parameters - and thus be ineligible for BTF - while in another it does not have optimized-out parameters - making it eligible for BTF. The NF_HOOK function is an example of this. To avoid disrupting BTF generation parallelism, the approach taken is to save such functions in a per-encoder binary tree for later addition. That way, at thread collection time, observations about optimizations can be merged across encoders and we know whether it is safe to add a "."-suffixed function or not. The result of this is we add 602 "."-suffixed functions to the BTF representation. However, note that the optimization checks are applied to both "."-suffixed and normal functions. They find 1428 of the latter with optimized-out parameters also, and these are dropped from the BTF representation also. For example, bad_inode_permission() is skipped because no location information is supplied for any of its parameters; disassembling it we see why this might be: (gdb) disassemble bad_inode_permission Dump of assembler code for function bad_inode_permission: 0xffffffff813ef180 <+0>: callq 0xffffffff81088c70 <__fentry__> 0xffffffff813ef185 <+5>: push %rbp 0xffffffff813ef186 <+6>: mov $0xfffffffb,%eax 0xffffffff813ef18b <+11>: mov %rsp,%rbp 0xffffffff813ef18e <+14>: pop %rbp 0xffffffff813ef18f <+15>: jmpq 0xffffffff81c6e600 <__x86_return_thunk> End of assembler dump. ...since the function is simply: static int bad_inode_permission(struct user_namespace *mnt_userns, struct inode *inode, int mask) { return -EIO; } So these changes lead to a net decrease of 826 functions in vmlinux BTF. [1] https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html Signed-off-by: Alan Maguire <alan.maguire@xxxxxxxxxx> --- btf_encoder.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- dwarves.h | 2 + pahole.c | 14 ++--- 3 files changed, 200 insertions(+), 13 deletions(-) diff --git a/btf_encoder.c b/btf_encoder.c index e20b628..f36150e 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -30,11 +30,13 @@ #include <errno.h> #include <stdint.h> +#include <search.h> /* for tsearch(), tfind() and tdestroy() */ #include <pthread.h> struct elf_function { const char *name; bool generated; + size_t prefixlen; }; #define MAX_PERCPU_VAR_CNT 4096 @@ -57,6 +59,8 @@ struct btf_encoder { struct elf_symtab *symtab; uint32_t type_id_off; uint32_t unspecified_type; + void *saved_func_tree; + int saved_func_cnt; bool has_index_type, need_index_type, skip_encoding_vars, @@ -77,12 +81,15 @@ struct btf_encoder { struct elf_function *entries; int allocated; int cnt; + int suffix_cnt; /* number of .isra, .part etc */ } functions; }; static LIST_HEAD(encoders); static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; +static void btf_encoder__add_saved_funcs(struct btf_encoder *encoder); + /* mutex only needed for add/delete, as this can happen in multiple encoding * threads. Traversal of the list is currently confined to thread collection. */ @@ -701,6 +708,10 @@ int32_t btf_encoder__add_encoder(struct btf_encoder *encoder, struct btf_encoder int32_t i, id; struct btf_var_secinfo *vsi; + btf_encoder__add_saved_funcs(other); + if (encoder == other) + return 0; + for (i = 0; i < nr_var_secinfo; i++) { vsi = (struct btf_var_secinfo *)var_secinfo_buf->entries + i; type_id = next_type_id + vsi->type - 1; /* Type ID starts from 1 */ @@ -776,6 +787,70 @@ static int32_t btf_encoder__add_decl_tag(struct btf_encoder *encoder, const char return id; } + +static int function__compare(const void *a, const void *b) +{ + struct function *fa = (struct function *)a, *fb = (struct function *)b; + + return strcmp(function__name(fa), function__name(fb)); +} + +struct btf_encoder_state { + struct btf_encoder *encoder; + uint32_t type_id_off; +}; + +static void btf_encoder__merge_func(struct btf_encoder *encoder, struct function *fn) +{ + struct function **nodep; + + nodep = tfind(fn, &encoder->saved_func_tree, function__compare); + if (!nodep || !*nodep) + return; + /* merge characteristics across different encoder representations + * of functions. + */ + fn->proto.optimized_parms |= (*nodep)->proto.optimized_parms; + (*nodep)->proto.optimized_parms |= fn->proto.optimized_parms; + (*nodep)->proto.processed = 1; +} + +static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct function *fn) +{ + const char *name = function__name(fn); + struct function **nodep; + + nodep = tsearch(fn, &encoder->saved_func_tree, function__compare); + if (nodep == NULL) { + fprintf(stderr, "error: out of memory adding static function '%s'\n", + name); + return -1; + } + /* If saving and we find an existing entry, we want to merge + * observations across both functions, checking that the + * "seen optimized parameters" status is reflected in our tree entry. + * If the entry is new, record encoder state required + * to add the local function later (encoder + type_id_off) + * such that we can add the function later. + */ + if (*nodep != fn) { + (*nodep)->proto.optimized_parms |= fn->proto.optimized_parms; + } else { + struct btf_encoder_state *state = zalloc(sizeof(*state)); + + if (state == NULL) { + fprintf(stderr, "error: out of memory adding local function '%s'\n", + name); + return -1; + } + state->encoder = encoder; + state->type_id_off = encoder->type_id_off; + fn->priv = state; + encoder->saved_func_cnt++; + } + return 0; +} + static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct function *fn) { int btf_fnproto_id, btf_fn_id, tag_type_id; @@ -801,6 +876,67 @@ static int32_t btf_encoder__add_func(struct btf_encoder *encoder, struct functio return 0; } +/* visit each node once, adding associated function. */ +static void btf_encoder__add_saved_func(const void *nodep, const VISIT which, + const int depth __maybe_unused) +{ + struct btf_encoder *encoder, *other_encoder; + struct btf_encoder_state *state; + struct function *fn = NULL; + + switch (which) { + case preorder: + case endorder: + break; + case postorder: + case leaf: + fn = *((struct function **)nodep); + break; + } + if (!fn || !fn->priv || fn->proto.processed) + return; + state = (struct btf_encoder_state *)fn->priv; + encoder = state->encoder; + encoder->type_id_off = state->type_id_off; + + /* merge optimized-out status across encoders */ + btf_encoders__for_each_encoder(other_encoder) { + if (other_encoder != encoder) + btf_encoder__merge_func(other_encoder, fn); + } + + if (fn->proto.optimized_parms) { + if (encoder->verbose) { + const char *name = function__name(fn); + + printf("skipping addition of '%s'(%s) due to optimized-out parameters\n", + name, fn->alias ?: name); + } + } else { + btf_encoder__add_func(encoder, fn); + fn->proto.processed = 1; + } +} + +static void saved_func__free(void *node) +{ + struct function *fn = node; + + if (fn->priv) + free(fn->priv); +} + +void btf_encoder__add_saved_funcs(struct btf_encoder *encoder) +{ + if (!encoder->saved_func_tree) + return; + + encoder->type_id_off = 0; + twalk(encoder->saved_func_tree, btf_encoder__add_saved_func); + tdestroy(encoder->saved_func_tree, saved_func__free); + encoder->saved_func_tree = NULL; +} + /* * This corresponds to the same macro defined in * include/linux/kallsyms.h @@ -812,6 +948,11 @@ static int functions_cmp(const void *_a, const void *_b) const struct elf_function *a = _a; const struct elf_function *b = _b; + /* if search key allows prefix match, verify target has matching + * prefix len and prefix matches. + */ + if (a->prefixlen && a->prefixlen == b->prefixlen) + return strncmp(a->name, b->name, b->prefixlen); return strcmp(a->name, b->name); } @@ -844,14 +985,21 @@ static int btf_encoder__collect_function(struct btf_encoder *encoder, GElf_Sym * } encoder->functions.entries[encoder->functions.cnt].name = name; + if (strchr(name, '.')) { + const char *suffix = strchr(name, '.'); + + encoder->functions.suffix_cnt++; + encoder->functions.entries[encoder->functions.cnt].prefixlen = suffix - name; + } encoder->functions.entries[encoder->functions.cnt].generated = false; encoder->functions.cnt++; return 0; } -static struct elf_function *btf_encoder__find_function(const struct btf_encoder *encoder, const char *name) +static struct elf_function *btf_encoder__find_function(const struct btf_encoder *encoder, + const char *name, size_t prefixlen) { - struct elf_function key = { .name = name }; + struct elf_function key = { .name = name, .prefixlen = prefixlen }; return bsearch(&key, encoder->functions.entries, encoder->functions.cnt, sizeof(key), functions_cmp); } @@ -1181,6 +1329,9 @@ int btf_encoder__encode(struct btf_encoder *encoder) { int err; + /* for single-threaded case, saved funcs are added here */ + btf_encoder__add_saved_funcs(encoder); + if (gobuffer__size(&encoder->percpu_secinfo) != 0) btf_encoder__add_datasec(encoder, PERCPU_SECTION); @@ -1628,6 +1779,7 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu, struct co } cu__for_each_function(cu, core_id, fn) { + bool save = false; /* * Skip functions that: @@ -1648,22 +1800,55 @@ int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu, struct co if (!name) continue; - func = btf_encoder__find_function(encoder, name); - if (!func || func->generated) + /* prefer exact function name match... */ + func = btf_encoder__find_function(encoder, name, 0); + if (func) { + if (func->generated) + continue; + func->generated = true; + } else if (encoder->functions.suffix_cnt) { + /* falling back to name.isra.0 match if no exact + * match is found; only bother if we found any + * .suffix function names. The function + * will be saved and added once we ensure + * it does not have optimized-out parameters + * in any cu. + */ + func = btf_encoder__find_function(encoder, name, + strlen(name)); + if (func) { + save = true; + if (encoder->verbose) + printf("matched function '%s' with '%s'%s\n", + name, func->name, + fn->proto.optimized_parms ? + ", has optimized-out parameters" : ""); + } + } + if (!func) continue; - func->generated = true; + fn->alias = func->name; } else { if (!fn->external) continue; } - err = btf_encoder__add_func(encoder, fn); + if (save) + err = btf_encoder__save_func(encoder, fn); + else + err = btf_encoder__add_func(encoder, fn); if (err) goto out; } if (!encoder->skip_encoding_vars) err = btf_encoder__encode_cu_variables(encoder); + + /* It is only safe to delete this CU if we have not stashed any static + * functions for later addition. + */ + if (!err) + err = encoder->saved_func_cnt > 0 ? LSK__KEEPIT : LSK__DELETE; out: encoder->cu = NULL; return err; diff --git a/dwarves.h b/dwarves.h index 2723466..64c7c56 100644 --- a/dwarves.h +++ b/dwarves.h @@ -831,6 +831,7 @@ struct ftype { uint16_t nr_parms; uint8_t unspec_parms:1; /* just one bit is needed */ uint8_t optimized_parms:1; + uint8_t processed:1; }; static inline struct ftype *tag__ftype(const struct tag *tag) @@ -883,6 +884,7 @@ struct function { struct rb_node rb_node; const char *name; const char *linkage_name; + const char *alias; /* name.isra.0 */ uint32_t cu_total_size_inline_expansions; uint16_t cu_total_nr_inline_expansions; uint8_t inlined:2; diff --git a/pahole.c b/pahole.c index 6f4f87c..bc120cb 100644 --- a/pahole.c +++ b/pahole.c @@ -2980,20 +2980,20 @@ static int pahole_threads_collect(struct conf_load *conf, int nr_threads, void * * Merge content of the btf instances of worker threads to the btf * instance of the primary btf_encoder. */ - if (!threads[i]->btf || threads[i]->encoder == btf_encoder) - continue; /* The primary btf_encoder */ + if (!threads[i]->btf) + continue; err = btf_encoder__add_encoder(btf_encoder, threads[i]->encoder); if (err < 0) goto out; - btf_encoder__delete(threads[i]->encoder); - threads[i]->encoder = NULL; } err = 0; out: for (i = 0; i < nr_threads; i++) { - if (threads[i]->encoder && threads[i]->encoder != btf_encoder) + if (threads[i]->encoder && threads[i]->encoder != btf_encoder) { btf_encoder__delete(threads[i]->encoder); + threads[i]->encoder = NULL; + } } free(threads[0]); @@ -3077,11 +3077,11 @@ static enum load_steal_kind pahole_stealer(struct cu *cu, encoder = btf_encoder; } - if (btf_encoder__encode_cu(encoder, cu, conf_load)) { + ret = btf_encoder__encode_cu(encoder, cu, conf_load); + if (ret < 0) { fprintf(stderr, "Encountered error while encoding BTF.\n"); exit(1); } - ret = LSK__DELETE; out_btf: return ret; } -- 1.8.3.1