Hi Tao, great patch. I have some comments and questions though. On Sun, 22 Aug 2021 09:51:12 +0800 Tao Liu <ltao@xxxxxxxxxx> wrote: > Currently the sequence for crash searching a symbol is: 1) kernel symname > hash table, 2) iterate all kernel symbols, 3) iterate all kernel modules > and their symbols. In the worst case, if a non-exist symbol been searched, > all 3 stages will be went through. The time consuming status for each stage > is like: > > stage 1 stage 2 stage 3 > 0.007000(ms) 0.593000(ms) 2.421000(ms) > > stage 3 takes too much time when comparing to stage 1. So let's introduce a > symname hash table for kernel modules to improve the performance of symbol > searching. > > Signed-off-by: Tao Liu <ltao@xxxxxxxxxx> > --- > v1 -> v2: > - Removed unused variables within the modified functions. > > --- > defs.h | 1 + > kernel.c | 1 + > symbols.c | 189 +++++++++++++++++++++++++++++++----------------------- > 3 files changed, 111 insertions(+), 80 deletions(-) > > diff --git a/defs.h b/defs.h > index eb1c71b..58b8546 100644 > --- a/defs.h > +++ b/defs.h > @@ -2751,6 +2751,7 @@ struct symbol_table_data { > double val_hash_searches; > double val_hash_iterations; > struct syment *symname_hash[SYMNAME_HASH]; > + struct syment *mod_symname_hash[SYMNAME_HASH]; ^^^^^^^^ there are quite some whitespace damages throughout the patch. Most of them seem to origin from old code that gets copy & pasted. It would be great if you could fix them on the lines you are touching. > struct symbol_namespace kernel_namespace; > struct syment *ext_module_symtable; > struct syment *ext_module_symend; > diff --git a/kernel.c b/kernel.c > index 36fdea2..c56cc34 100644 > --- a/kernel.c > +++ b/kernel.c > @@ -4663,6 +4663,7 @@ reinit_modules(void) > kt->mods_installed = 0; > clear_text_value_cache(); > > + memset(st->mod_symname_hash, 0, sizeof(st->mod_symname_hash)); > module_init(); > } > > diff --git a/symbols.c b/symbols.c > index bf6d94d..9b64734 100644 > --- a/symbols.c > +++ b/symbols.c > @@ -64,8 +64,9 @@ static int namespace_ctl(int, struct symbol_namespace *, void *, void *); > static void symval_hash_init(void); > static struct syment *symval_hash_search(ulong); > static void symname_hash_init(void); > -static void symname_hash_install(struct syment *); > -static struct syment *symname_hash_search(char *); > +static void symname_hash_install(struct syment *[], struct syment *); > +static void symname_hash_remove(struct syment *[], struct syment *); > +static struct syment *symname_hash_search(struct syment *[], char *, int (*)(struct syment *, char *)); > static void gnu_qsort(bfd *, void *, long, unsigned int, asymbol *, asymbol *); > static int check_gnu_debuglink(bfd *); > static int separate_debug_file_exists(const char *, unsigned long, int *); > @@ -1119,7 +1120,7 @@ symname_hash_init(void) > struct syment *sp; > > for (sp = st->symtable; sp < st->symend; sp++) > - symname_hash_install(sp); > + symname_hash_install(st->symname_hash, sp); > > if ((sp = symbol_search("__per_cpu_start"))) > st->__per_cpu_start = sp->value; > @@ -1127,21 +1128,48 @@ symname_hash_init(void) > st->__per_cpu_end = sp->value; > } > > +static void > +mod_symtable_hash_install_range(struct syment *from, struct syment *to) > +{ > + struct syment *sp; > + > + for (sp = from; sp <= to; sp++) { > + if (sp != NULL) { > + symname_hash_install(st->mod_symname_hash, sp); > + } > + } > +} > + > +static void > +mod_symtable_hash_remove_range(struct syment *from, struct syment *to) > +{ > + struct syment *sp; > + > + for (sp = from; sp <= to; sp++) { > + if (sp != NULL) { > + symname_hash_remove(st->mod_symname_hash, sp); > + } > + } > +} 1. 'if (sp)' is the same like 'if (sp != NULL)' but shorter. That's why personally I prefer the first version :) 2. Wouldn't it make sense to move this check to the beginning of symname_hash_{install,remove}? Then also the other users like symname_hash_init would benefit. > /* > * Install a single static kernel symbol into the symname_hash. > */ > static void > -symname_hash_install(struct syment *spn) > +symname_hash_install(struct syment *table[], struct syment *spn) > { > struct syment *sp; > int index; > > index = SYMNAME_HASH_INDEX(spn->name); > + index = index > 0 ? index : -index; true, in theory index could be negative which would be really bad. But isn't that an independent problem from the rest of the patch? If so this change should go in an extra patch. Furthermore the fix should be done in SYMNAME_HASH_INDEX so that all of its users are fixed. The way I see it this should be enough (using abs from stdlib.h) #define SYMNAME_HASH_INDEX(name) \ - ((name[0] ^ (name[strlen(name)-1] * name[strlen(name)/2])) % SYMNAME_HASH) + (abs((name[0] ^ (name[strlen(name)-1] * name[strlen(name)/2])) % SYMNAME_HASH)) > spn->cnt = 1; > > - if ((sp = st->symname_hash[index]) == NULL) > - st->symname_hash[index] = spn; > - else { > + if ((sp = table[index]) == NULL) { > + table[index] = spn; > + spn->name_hash_next = NULL; similar to above, isn't explicitly setting spn->name_hash_next = NULL an independent problem from the rest of the changes and thus should go to a separate patch? > + } else { > while (sp) { > if (STREQ(sp->name, spn->name)) { > sp->cnt++; > @@ -1151,23 +1179,67 @@ symname_hash_install(struct syment *spn) > sp = sp->name_hash_next; > else { > sp->name_hash_next = spn; > + spn->name_hash_next = NULL; > break; > } > } > } > } > > +static void > +symname_hash_remove(struct syment *table[], struct syment *spn) > +{ > + struct syment *sp, **tmp; > + int index, first_encounter = 1; > + > + index = SYMNAME_HASH_INDEX(spn->name); > + index = index > 0 ? index : -index; > + > + if ((sp = table[index]) == NULL) > + return; > + > + for (tmp = &table[index], sp = table[index]; sp; ) { > + if (STREQ(sp->name, spn->name)) { > + if (sp != spn) { > + sp->cnt--; > + spn->cnt--; > + } else if (!first_encounter) { > + sp->cnt--; > + } else { > + *tmp = sp->name_hash_next; > + first_encounter = 0; > + > + tmp = &(*tmp)->name_hash_next; > + sp = sp->name_hash_next; > + spn->name_hash_next = NULL; > + continue; > + } > + } > + tmp = &sp->name_hash_next; > + sp = sp->name_hash_next; What do you need tmp for? The way I see it you only assign to it but never really use it. > + } > +} > + > /* > * Static kernel symbol value search > */ > static struct syment * > -symname_hash_search(char *name) > +symname_hash_search(struct syment *table[], char *name, > + int (*skip_condition)(struct syment *, char *)) this line should be indented to match the open parentheses after the function name. > { > struct syment *sp; > + int index; > > - sp = st->symname_hash[SYMNAME_HASH_INDEX(name)]; > + index = SYMNAME_HASH_INDEX(name); > + index = index > 0 ? index : -index; > + sp = table[index]; > > while (sp) { > + if (skip_condition && skip_condition(sp, name)) { > + sp = sp->name_hash_next; > + continue; > + } > + > if (STREQ(sp->name, name)) > return sp; > sp = sp->name_hash_next; > @@ -1595,6 +1667,7 @@ store_module_symbols_v1(ulong total, int mods_installed) > lm->mod_symend = sp; > } > } > + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend); > } > > st->flags |= MODULE_SYMS; > @@ -2075,6 +2148,8 @@ store_module_symbols_v2(ulong total, int mods_installed) > lm->mod_init_symend = sp; > } > } > + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend); > + mod_symtable_hash_install_range(lm->mod_init_symtable, lm->mod_init_symend); > } > > st->flags |= MODULE_SYMS; > @@ -4482,6 +4557,16 @@ symbol_query(char *s, char *print_pad, struct syment **spp) > return(cnt); > } > > +static int > +skip_symbols(struct syment *sp, char *s) > +{ > + int pseudos, skip = 0; > + pseudos = (strstr(s, "_MODULE_START_") || strstr(s, "_MODULE_END_") || > + strstr(s, "_MODULE_INIT_START_") || strstr(s, "_MODULE_INIT_END_")); > + if (!pseudos && MODULE_PSEUDO_SYMBOL(sp)) > + skip = 1; > + return skip; > +} It took really long for me to wrap my head around what is happening here but in the end I'm pretty sure that the extra filtering is unnecessary and can simply be dropped without problem. To be fair what you are doing seems correct it's just by cleaning up the code the problem became more obvious... Let's see what is happening here: 1) strstr returns a pointer to the start of the second string if is is contained in the first one and NULL otherwise. This means 'pseudos' is true if 's' contains any of the _MODULE_* strings, i.e. if s is a pseudo symbol. 2) MODULE_PSEUDO_SYMBOL does basically the same only that it checks 'sp->name' instead of 's' and enforces that the "_MODULE_*" strings are at the beginning of the symbol name not just contained in it. Let's look at the different cases 3.1) both 's' and 'sp' are proper, i.e. no pseudo, symbols This means skip_symbols returns false so symname_hash_search doesn't skip the symbol but compares 's' to 'sp->name' to check if 'sp' is the symbol you are searching for. This is basically the case you want. 3.2) both 's' and 'sp' are pseudo symbols Again skip_symbols returns false and symname_hash_search compares 's' with 'sp->name' to check if 'sp' is the symbol you are searching for. I'm not entirely sure if this way symname_hash_search is intended to be used but I also don't see a reason why it shouldn't be done. 3.3) 's' is a pseudo and 'sp' a proper symbol same like 3.2). 3.4) 's' is a proper symbol and 'sp' a psudo symbol here skip_symbols returns true and symname_hash_search skips this case. So the only case that is filtered out is 3.4 in which 's' must not contain any '_MODULES_*' while 'sp->name' has to start with one. But that's something a simple STREQ can handle like in case 3.3. So what's the point in having this extra filtering? > /* > * Return the syment of a symbol. > @@ -4489,58 +4574,16 @@ symbol_query(char *s, char *print_pad, struct syment **spp) > struct syment * > symbol_search(char *s) > { > - int i; > - struct syment *sp_hashed, *sp, *sp_end; > - struct load_module *lm; > - int pseudos, search_init; > + struct syment *sp_hashed, *sp; > > - sp_hashed = symname_hash_search(s); > + sp_hashed = symname_hash_search(st->symname_hash, s, NULL); > > for (sp = sp_hashed ? sp_hashed : st->symtable; sp < st->symend; sp++) { > if (STREQ(s, sp->name)) > return(sp); > } > > - pseudos = (strstr(s, "_MODULE_START_") || strstr(s, "_MODULE_END_")); > - search_init = FALSE; > - > - for (i = 0; i < st->mods_installed; i++) { > - lm = &st->load_modules[i]; > - if (lm->mod_flags & MOD_INIT) > - search_init = TRUE; > - sp = lm->mod_symtable; > - sp_end = lm->mod_symend; > - > - for ( ; sp <= sp_end; sp++) { > - if (!pseudos && MODULE_PSEUDO_SYMBOL(sp)) > - continue; > - if (STREQ(s, sp->name)) > - return(sp); > - } > - } > - > - if (!search_init) > - return((struct syment *)NULL); > - > - pseudos = (strstr(s, "_MODULE_INIT_START_") || strstr(s, "_MODULE_INIT_END_")); > - > - for (i = 0; i < st->mods_installed; i++) { > - lm = &st->load_modules[i]; > - if (!lm->mod_init_symtable) > - continue; > - sp = lm->mod_init_symtable; > - sp_end = lm->mod_init_symend; > - > - for ( ; sp < sp_end; sp++) { > - if (!pseudos && MODULE_PSEUDO_SYMBOL(sp)) > - continue; > - > - if (STREQ(s, sp->name)) > - return(sp); > - } > - } > - > - return((struct syment *)NULL); > + return symname_hash_search(st->mod_symname_hash, s, skip_symbols); > } > > /* > @@ -5432,33 +5475,13 @@ value_symbol(ulong value) > int > symbol_exists(char *symbol) > { > - int i; > - struct syment *sp, *sp_end; > - struct load_module *lm; > + struct syment *sp; > > - if ((sp = symname_hash_search(symbol))) > + if ((sp = symname_hash_search(st->symname_hash, symbol, NULL))) > return TRUE; > > - for (i = 0; i < st->mods_installed; i++) { > - lm = &st->load_modules[i]; > - sp = lm->mod_symtable; > - sp_end = lm->mod_symend; > - > - for ( ; sp < sp_end; sp++) { > - if (STREQ(symbol, sp->name)) > - return(TRUE); > - } > - > - if (lm->mod_init_symtable) { > - sp = lm->mod_init_symtable; > - sp_end = lm->mod_init_symend; > - > - for ( ; sp < sp_end; sp++) { > - if (STREQ(symbol, sp->name)) > - return(TRUE); > - } > - } > - } > + if ((sp = symname_hash_search(st->mod_symname_hash, symbol, NULL))) > + return TRUE; > > return(FALSE); > } Isn't this function basically identical to symbol_search and thus can be abbreviated to return !!(symbol_search(symbol)); > @@ -5515,7 +5538,7 @@ kernel_symbol_exists(char *symbol) > { > struct syment *sp; > > - if ((sp = symname_hash_search(symbol))) > + if ((sp = symname_hash_search(st->symname_hash, symbol, NULL))) > return TRUE; > else > return FALSE; same like above. This could be abbreviated to return !!(symname_hash_search(st->symname_hash, symbol, NULL)); > @@ -5527,7 +5550,7 @@ kernel_symbol_exists(char *symbol) > struct syment * > kernel_symbol_search(char *symbol) > { > - return symname_hash_search(symbol); > + return symname_hash_search(st->symname_hash, symbol, NULL); > } > > /* > @@ -12464,8 +12487,10 @@ store_load_module_symbols(bfd *bfd, int dynamic, void *minisyms, > error(INFO, "%s: last symbol: %s is not _MODULE_END_%s?\n", > lm->mod_name, lm->mod_load_symend->name, lm->mod_name); > > + mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend); > lm->mod_symtable = lm->mod_load_symtable; > lm->mod_symend = lm->mod_load_symend; > + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend); > > lm->mod_flags &= ~MOD_EXT_SYMS; > lm->mod_flags |= MOD_LOAD_SYMS; > @@ -12495,6 +12520,7 @@ delete_load_module(ulong base_addr) > req->name = lm->mod_namelist; > gdb_interface(req); > } > + mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend); > if (lm->mod_load_symtable) { > free(lm->mod_load_symtable); > namespace_ctl(NAMESPACE_FREE, > @@ -12504,6 +12530,7 @@ delete_load_module(ulong base_addr) > unlink_module(lm); > lm->mod_symtable = lm->mod_ext_symtable; > lm->mod_symend = lm->mod_ext_symend; > + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend); > lm->mod_flags &= ~(MOD_LOAD_SYMS|MOD_REMOTE|MOD_NOPATCH); > lm->mod_flags |= MOD_EXT_SYMS; > lm->mod_load_symtable = NULL; > @@ -12532,6 +12559,7 @@ delete_load_module(ulong base_addr) > req->name = lm->mod_namelist; > gdb_interface(req); > } > + mod_symtable_hash_remove_range(lm->mod_symtable, lm->mod_symend); > if (lm->mod_load_symtable) { > free(lm->mod_load_symtable); > namespace_ctl(NAMESPACE_FREE, > @@ -12541,6 +12569,7 @@ delete_load_module(ulong base_addr) > unlink_module(lm); > lm->mod_symtable = lm->mod_ext_symtable; > lm->mod_symend = lm->mod_ext_symend; > + mod_symtable_hash_install_range(lm->mod_symtable, lm->mod_symend); > lm->mod_flags &= ~(MOD_LOAD_SYMS|MOD_REMOTE|MOD_NOPATCH); > lm->mod_flags |= MOD_EXT_SYMS; > lm->mod_load_symtable = NULL; I must admit I don't understand how the last two functions work, so I'm relying on Kazu to comment on those. Thanks Philipp -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility