Am 24.10.2017 um 20:59 schrieb Stefan Beller: > Instead of using the hash seeded with 5381, and updated via > `(hash << 5) ^ new_byte`, use the FNV-1 primitives as offered by > hashmap.h, which is seeded with 0x811c9dc5 and computed as > `(hash * 0x01000193) ^ new_byte`. The hash function you're replacing is called DJB2; I think that's worth mentioning. Performance test results would be nice. No idea how to find edge cases, though, or better: demonstrate a lack thereof. > > Signed-off-by: Stefan Beller <sbeller@xxxxxxxxxx> > --- > xdiff/xutils.c | 19 ++++++++----------- > 1 file changed, 8 insertions(+), 11 deletions(-) > > diff --git a/xdiff/xutils.c b/xdiff/xutils.c > index 04d7b32e4e..a58a28c687 100644 > --- a/xdiff/xutils.c > +++ b/xdiff/xutils.c > @@ -24,7 +24,8 @@ > #include <assert.h> > #include "xinclude.h" > > - > +#include "cache.h" > +#include "hashmap.h" Ouch. Defining FNV32_BASE and FNV32_PRIME here would be much easier overall. And if that's too much duplication then those definitions could be extracted into a new header file (fnv32.h?) included by both hashmap.h and xutils.c. > > > long xdl_bogosqrt(long n) { > @@ -228,7 +229,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) > > static unsigned long xdl_hash_record_with_whitespace(char const **data, > char const *top, long flags) { > - unsigned long ha = 5381; > + unsigned long ha = memhash(NULL, 0); > char const *ptr = *data; > > for (; ptr < top && *ptr != '\n'; ptr++) { > @@ -243,21 +244,18 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data, > ; /* already handled */ > else if (flags & XDF_IGNORE_WHITESPACE_CHANGE > && !at_eol) { > - ha += (ha << 5); > - ha ^= (unsigned long) ' '; > + ha = memhash_feed(ha, (unsigned char) ' '); All the memhash_feed() callers in this file cast to unsigned char. A macro or a function (possibly inline) defined at the top could do that for them. > } > else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL > && !at_eol) { > while (ptr2 != ptr + 1) { > - ha += (ha << 5); > - ha ^= (unsigned long) *ptr2; > + ha = memhash_feed(ha, (unsigned char) *ptr2); > ptr2++; > } > } > continue; > } > - ha += (ha << 5); > - ha ^= (unsigned long) *ptr; > + ha = memhash_feed(ha, (unsigned char) *ptr); > } > *data = ptr < top ? ptr + 1: ptr; > > @@ -265,15 +263,14 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data, > } > > unsigned long xdl_hash_record(char const **data, char const *top, long flags) { > - unsigned long ha = 5381; > + unsigned long ha = memhash(NULL, 0); > char const *ptr = *data; > > if (flags & XDF_WHITESPACE_FLAGS) > return xdl_hash_record_with_whitespace(data, top, flags); > > for (; ptr < top && *ptr != '\n'; ptr++) { > - ha += (ha << 5); > - ha ^= (unsigned long) *ptr; > + ha = memhash_feed(ha, (unsigned char) *ptr); > } > *data = ptr < top ? ptr + 1: ptr; > >