Eric Blake <ebb9@xxxxxxx> writes: > right now, both the 1.4 branch and head use an algorithm borrowed from emacs: That algorithm could be improved. M4 should do all its hash computation using size_t, not a mixture of int and size_t. And it shouldn't slow itself down to do case-folding, since M4 is case-sensitive. And it shouldn't assume that int is 32 bits wide. Worst of all, M4 shouldn't assume that the absolute value of an 'int' must be nonnegative. That is not true for INT_MIN. On most current hosts, m4-1.4.4 has an approximately one in 2**32 chance of using INT_MIN % 509 as a subscript, which on most hosts will lead to a subscript error that will cause serious trouble. Here is a patch to m4 1.4.4 that fixes these problems. It results in a very slight performance improvement to Autoconf, and a very slightly-smaller M4 executable. It changes M4 to use the same hash algorithm as gnulib/lib/hash.c. 2006-06-04 Paul Eggert <eggert@xxxxxxxxxxx> * src/m4.h (hash_table_size): Now size_t instead of int. * src/m4.c (hash_table_size): Likewise. (main): Adjust to this; use atol rather than atoi. * src/symtab.c: Include <limits.h>, for CHAR_BIT. (symtab_init, lookup_symbol, hack_all_symbols): Use size_t for sizes and indexes, not int. (hash): Likewise. Don't case-fold in the hash function. Shift by 7, not 3, for consistency with gnulib/lib/hash.c. Don't assume hash word is 32 bits. diff -pru m4-1.4.4/src/m4.c m4-1.4.4-hash/src/m4.c --- m4-1.4.4/src/m4.c 2005-10-18 04:48:58.000000000 -0700 +++ m4-1.4.4-hash/src/m4.c 2006-06-04 20:54:36.000000000 -0700 @@ -42,7 +42,7 @@ int sync_output = 0; int debug_level = 0; /* Hash table size (should be a prime) (-Hsize). */ -int hash_table_size = HASHMAX; +size_t hash_table_size = HASHMAX; /* Disable GNU extensions (-G). */ int no_gnu_extensions = 0; @@ -329,8 +329,8 @@ main (int argc, char *const *argv, char break; case 'H': - hash_table_size = atoi (optarg); - if (hash_table_size <= 0) + hash_table_size = atol (optarg); + if (hash_table_size == 0) hash_table_size = HASHMAX; break; diff -pru m4-1.4.4/src/m4.h m4-1.4.4-hash/src/m4.h --- m4-1.4.4/src/m4.h 2005-05-01 04:37:09.000000000 -0700 +++ m4-1.4.4-hash/src/m4.h 2006-06-04 20:54:56.000000000 -0700 @@ -150,7 +150,7 @@ typedef struct token_data token_data; /* Option flags. */ extern int sync_output; /* -s */ extern int debug_level; /* -d */ -extern int hash_table_size; /* -H */ +extern size_t hash_table_size; /* -H */ extern int no_gnu_extensions; /* -G */ extern int prefix_all_builtins; /* -P */ extern int max_debug_argument_length; /* -l */ diff -pru m4-1.4.4/src/symtab.c m4-1.4.4-hash/src/symtab.c --- m4-1.4.4/src/symtab.c 2005-05-01 04:35:20.000000000 -0700 +++ m4-1.4.4-hash/src/symtab.c 2006-06-04 21:18:39.000000000 -0700 @@ -32,6 +32,7 @@ will then always be the first found. */ #include "m4.h" +#include <limits.h> /*----------------------------------------------------------------------. | Initialise the symbol table, by allocating the necessary storage, and | @@ -44,34 +45,29 @@ symbol **symtab; void symtab_init (void) { - int i; + size_t i; symbol **s; s = symtab = (symbol **) xmalloc (hash_table_size * sizeof (symbol *)); - for (i = hash_table_size; --i >= 0;) - *s++ = NULL; + for (i = 0; i < hash_table_size; i++) + s[i] = NULL; } /*--------------------------------------------------. | Return a hashvalue for a string, from GNU-emacs. | `--------------------------------------------------*/ -static int +static size_t hash (const char *s) { - register int val = 0; + register size_t val = 0; register const char *ptr = s; register char ch; while ((ch = *ptr++) != '\0') - { - if (ch >= 0140) - ch -= 40; - val = ((val << 3) + (val >> 28) + ch); - }; - val = (val < 0) ? -val : val; + val = (val << 7) + (val >> (sizeof (val) * CHAR_BIT - 7)) + ch; return val % hash_table_size; } @@ -105,7 +101,8 @@ free_symbol (symbol *sym) symbol * lookup_symbol (const char *name, symbol_lookup mode) { - int h, cmp = 1; + size_t h; + int cmp = 1; symbol *sym, *prev; symbol **spp; @@ -209,7 +206,7 @@ lookup_symbol (const char *name, symbol_ void hack_all_symbols (hack_symbol *func, const char *data) { - int h; + size_t h; symbol *sym; for (h = 0; h < hash_table_size; h++) _______________________________________________ Autoconf mailing list Autoconf@xxxxxxx http://lists.gnu.org/mailman/listinfo/autoconf