>From 502cbd61e386eb014035df66b032e90a9de926b7 Mon Sep 17 00:00:00 2001 From: Devin Hussey <husseydevin@xxxxxxxxx> Date: Thu, 15 Nov 2018 09:12:24 -0500 Subject: [PATCH] Use a real non-cryptographic hash algorithm dash previously used a modified version of the LoseLose algorithm. LoseLose is a TERRIBLE algorithm. It has terrible distribution, leaving many hash entries unused. However, more importantly, it is incredibly easy to make a collision; even something as simple as rearranging the letters. For example. these all produce the hash 1652: Hello Holle Hlleo Hoell Haxed\n HASH\tBAD Hater Collsions in hash tables are very bad because it requires looking through a linked list, and the benefits of O(1) time in a hash table are completely lost. The FNV-1a algorithm has comparable performance and code size while having a much better collision rate. While there are some other faster and more resistant hashes out there, namely xxHash, Murmur2, and CityHash, the benefits are minimal on short strings and they expect a length argument, unlike FNV-1a which simply uses null-termination, and they are not in the public domain like FNV-1a. Signed-off-by: Devin Hussey <husseydevin@xxxxxxxxx> --- src/alias.c | 14 ++++---------- src/exec.c | 11 +++++------ src/shell.h | 3 +++ src/var.c | 5 ++--- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/alias.c b/src/alias.c index daeacbb..f625199 100644 --- a/src/alias.c +++ b/src/alias.c @@ -202,19 +202,13 @@ printalias(const struct alias *ap) { STATIC struct alias ** __lookupalias(const char *name) { - unsigned int hashval; struct alias **app; - const char *p; - unsigned int ch; + unsigned int hashval = FNV_OFFSET_BASIS; + const char *p = name; - p = name; + while (*p) + hashval = (hashval ^ (unsigned char)*p++) * FNV_PRIME; - ch = (unsigned char)*p; - hashval = ch << 4; - while (ch) { - hashval += ch; - ch = (unsigned char)*++p; - } app = &atab[hashval % ATABSIZE]; for (; *app; app = &(*app)->next) { diff --git a/src/exec.c b/src/exec.c index 9d0215a..c6b515d 100644 --- a/src/exec.c +++ b/src/exec.c @@ -638,16 +638,15 @@ struct tblentry **lastcmdentry; STATIC struct tblentry * cmdlookup(const char *name, int add) { - unsigned int hashval; - const char *p; struct tblentry *cmdp; struct tblentry **pp; - p = name; - hashval = (unsigned char)*p << 4; + unsigned int hashval = FNV_OFFSET_BASIS; + const char *p = name; + while (*p) - hashval += (unsigned char)*p++; - hashval &= 0x7FFF; + hashval = (hashval ^ (unsigned char)*p++) * FNV_PRIME; + pp = &cmdtable[hashval % CMDTABLESIZE]; for (cmdp = *pp ; cmdp ; cmdp = cmdp->next) { if (equal(cmdp->cmdname, name)) diff --git a/src/shell.h b/src/shell.h index 98edc8b..d08d0d5 100644 --- a/src/shell.h +++ b/src/shell.h @@ -82,6 +82,9 @@ extern char nullstr[1]; /* null string */ #define TRACEV(param) #endif +#define FNV_PRIME 16777619u +#define FNV_OFFSET_BASIS 2166136261u + #if defined(__GNUC__) && __GNUC__ < 3 #define va_copy __va_copy #endif diff --git a/src/var.c b/src/var.c index 0d7e1db..be34731 100644 --- a/src/var.c +++ b/src/var.c @@ -637,11 +637,10 @@ void unsetvar(const char *s) STATIC struct var ** hashvar(const char *p) { - unsigned int hashval; + unsigned int hashval = FNV_OFFSET_BASIS; - hashval = ((unsigned char) *p) << 4; while (*p && *p != '=') - hashval += (unsigned char) *p++; + hashval = (hashval ^ (unsigned char) *p++) + FNV_PRIME; return &vartab[hashval % VTABSIZE]; } -- 2.19.1