-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 According to Eric Blake on 6/4/2006 4:09 PM: > > For CVS coreutils, with 83303 definitions and 2526066 lookups, using > -H99991 reduced comparisons from 9364329 (36022688 bytes, 3.5 comparisons > per lookup) to 2415937 (22883978 bytes, less than 1 compare per lookup), > but never bought me much more than .5 sec out of 30 on my machine (1.5 % > speedup, but that was with naive profiling turned on rather than an > optimized strcmp). So having autom4te play with -H may not buy us much. > I'll report separately about potential m4 1.4.5 improvements; this mail is > just limited to the framework for testing the various ideas. This patch stores string hashes, to avoid strcmp. With the default 509 buckets on CVS coreutils, it causes only 2362141 strcmp, with only 25 failed comparisons. There are still 22829893 bytes compared (ie. the 2.3M successful lookups results in 22M of length, or about 9 characters per average token); this shaved only 60k of comparisons from the earlier run with -H99991 buckets without this patch. Comparing the time for m4 1.4.4 and my version of m4 with this patch, both compiled at -O2, still shows no noticeable difference in timing of autoconf. But keep in mind that autoconf is also invoking several other processes, perhaps the actual m4 invocations are noticeably faster. So I don't know whether it is worth applying this patch. 2006-06-04 Eric Blake <ebb9@xxxxxxx> * src/m4.h (struct symbol, SYMBOL_HASH): Store symbol's hash. * src/symtab.c (hash): Don't perform modulo here. (lookup_symbol): Store hash, use it to reduce strcmp calls. (symtab_print_list) [DEBUG_SYM]: Display hash. - -- Life is short - so eat dessert first! Eric Blake ebb9@xxxxxxx -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.2.1 (Cygwin) Comment: Public key at home.comcast.net/~ericblake/eblake.gpg Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org iD8DBQFEg23O84KuGfSFAYARAoj/AJ9L5ujZrluhCIIp/lJ8s03XsGQyewCfal77 rAnmz/AeBwETqyymOzApHZ8= =+S6M -----END PGP SIGNATURE-----
Index: src/m4.h =================================================================== RCS file: /sources/m4/m4/src/m4.h,v retrieving revision 1.1.1.1.2.5 diff -u -p -r1.1.1.1.2.5 m4.h --- src/m4.h 27 May 2006 18:11:23 -0000 1.1.1.1.2.5 +++ src/m4.h 4 Jun 2006 23:31:33 -0000 @@ -382,6 +382,7 @@ struct symbol boolean blind_no_args; char *name; + unsigned hash; token_data data; }; @@ -391,6 +392,7 @@ struct symbol #define SYMBOL_MACRO_ARGS(S) ((S)->macro_args) #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args) #define SYMBOL_NAME(S) ((S)->name) +#define SYMBOL_HASH(S) ((S)->hash) #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data)) #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data)) #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data)) Index: src/symtab.c =================================================================== RCS file: /sources/m4/m4/src/Attic/symtab.c,v retrieving revision 1.1.1.1.2.3 diff -u -p -r1.1.1.1.2.3 symtab.c --- src/symtab.c 4 Jun 2006 22:09:30 -0000 1.1.1.1.2.3 +++ src/symtab.c 4 Jun 2006 23:31:33 -0000 @@ -117,10 +117,10 @@ symtab_init (void) | Return a hashvalue for a string, from GNU-emacs. | `--------------------------------------------------*/ -static int +static unsigned hash (const char *s) { - register int val = 0; + register unsigned val = 0; register const char *ptr = s; register char ch; @@ -131,8 +131,7 @@ hash (const char *s) ch -= 40; val = ((val << 3) + (val >> 28) + ch); }; - val = (val < 0) ? -val : val; - return val % hash_table_size; + return val; } /*--------------------------------------------. @@ -165,7 +164,8 @@ free_symbol (symbol *sym) symbol * lookup_symbol (const char *name, symbol_lookup mode) { - int h, cmp = 1; + unsigned h; + int cmp = 1; symbol *sym, *prev; symbol **spp; @@ -175,11 +175,13 @@ lookup_symbol (const char *name, symbol_ #endif /* DEBUG_SYM */ h = hash (name); - sym = symtab[h]; + sym = symtab[h % hash_table_size]; for (prev = NULL; sym != NULL; prev = sym, sym = sym->next) { - cmp = strcmp (SYMBOL_NAME (sym), name); + cmp = (h < SYMBOL_HASH (sym) ? -1 + : h > SYMBOL_HASH (sym) ? 1 + : strcmp (SYMBOL_NAME (sym), name)); if (cmp >= 0) break; } @@ -191,7 +193,7 @@ lookup_symbol (const char *name, symbol_ /* Symbol not found. */ - spp = (prev != NULL) ? &prev->next : &symtab[h]; + spp = (prev != NULL) ? &prev->next : &symtab[h % hash_table_size]; switch (mode) { @@ -215,6 +217,7 @@ lookup_symbol (const char *name, symbol_ SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = SYMBOL_SHADOWED (sym) = FALSE; SYMBOL_NAME (sym) = xstrdup (name); + SYMBOL_HASH (sym) = h; SYMBOL_SHADOWED (sym) = FALSE; SYMBOL_MACRO_ARGS (sym) = FALSE; SYMBOL_BLIND_NO_ARGS (sym) = FALSE; @@ -330,9 +333,9 @@ symtab_print_list (int i) printf ("Symbol dump #%d:\n", i); for (h = 0; h < hash_table_size; h++) for (sym = symtab[h]; sym != NULL; sym = sym->next) - printf ("\tname %s, bucket %d, addr %p, next %p, " + printf ("\tname %s, hash %u, bucket %d, addr %p, next %p, " "flags%s%s\n", - SYMBOL_NAME (sym), + SYMBOL_NAME (sym), SYMBOL_HASH (sym), h, sym, SYMBOL_NEXT (sym), SYMBOL_TRACED (sym) ? " traced" : "", SYMBOL_SHADOWED (sym) ? " shadowed" : "");
_______________________________________________ Autoconf mailing list Autoconf@xxxxxxx http://lists.gnu.org/mailman/listinfo/autoconf