Also here: https://github.com/kblees/git/commits/kb/hashmap-v5 Changes since v3: - changed table resizing and grow / shrink thresholds (#02) - hashmap_free: changed free_function to boolean (#02, #06, #07, #09) - swapped xcalloc args (num elements, element size) (#02) - fixed pointer cast formatting "(type*)" -> "(type *)" (#02) - removed function pointer casts "(*fn)(...)" -> "fn(...)" (#02) - api-hashmap.txt: clarified removal / replacement of entries (#02) - update-index.c: fixed const-casts (#13, #14) Karsten Jens Lehmann (1): submodule: don't access the .gitmodules cache entry after removing it Karsten Blees (13): add a hashtable implementation that supports O(1) removal buitin/describe.c: use new hash map implementation diffcore-rename.c: move code around to prepare for the next patch diffcore-rename.c: simplify finding exact renames diffcore-rename.c: use new hash map implementation name-hash.c: use new hash map implementation for directories name-hash.c: remove unreferenced directory entries name-hash.c: use new hash map implementation for cache entries name-hash.c: remove cache entries instead of marking them CE_UNHASHED remove old hash.[ch] implementation fix 'git update-index --verbose --again' output builtin/update-index.c: cleanup update_one read-cache.c: fix memory leaks caused by removed cache entries Documentation/technical/api-hash.txt | 52 ------- Documentation/technical/api-hashmap.txt | 235 +++++++++++++++++++++++++++++ Makefile | 5 +- builtin/describe.c | 53 +++---- builtin/rm.c | 2 +- builtin/update-index.c | 39 +++-- cache.h | 18 +-- diffcore-rename.c | 185 ++++++++--------------- hash.c | 110 -------------- hash.h | 50 ------- hashmap.c | 228 ++++++++++++++++++++++++++++ hashmap.h | 71 +++++++++ name-hash.c | 156 +++++++------------ read-cache.c | 10 +- resolve-undo.c | 7 +- submodule.c | 25 +--- t/t0011-hashmap.sh | 240 ++++++++++++++++++++++++++++++ test-hashmap.c | 256 ++++++++++++++++++++++++++++++++ unpack-trees.c | 3 +- 19 files changed, 1216 insertions(+), 529 deletions(-) delete mode 100644 Documentation/technical/api-hash.txt create mode 100644 Documentation/technical/api-hashmap.txt delete mode 100644 hash.c delete mode 100644 hash.h create mode 100644 hashmap.c create mode 100644 hashmap.h create mode 100755 t/t0011-hashmap.sh create mode 100644 test-hashmap.c -- git diff kb/hashmap-v4: diff --git a/Documentation/technical/api-hashmap.txt b/Documentation/technical/api-hashmap.txt index fc46e56..b2280f1 100644 --- a/Documentation/technical/api-hashmap.txt +++ b/Documentation/technical/api-hashmap.txt @@ -42,11 +42,6 @@ parameters that have the same hash code. When looking up an entry, the `key` and `keydata` parameters to hashmap_get and hashmap_remove are always passed as second and third argument, respectively. Otherwise, `keydata` is NULL. -`void (*hashmap_free_fn)(void *entry)`:: - - User-supplied function to free a hashmap_entry. If entries are simply - malloc'ed memory, use stdlib's free() function. - Functions --------- @@ -76,14 +71,14 @@ If the total number of entries is known in advance, the `initial_size` parameter may be used to preallocate a sufficiently large table and thus prevent expensive resizing. If 0, the table is dynamically resized. -`void hashmap_free(struct hashmap *map, hashmap_free_fn free_function)`:: +`void hashmap_free(struct hashmap *map, int free_entries)`:: Frees a hashmap structure and allocated memory. + `map` is the hashmap to free. + -If `free_function` is specified (not NULL), it is called to free each -hashmap_entry in the map. If entries are simply malloc'ed, use stdlib's free(). +If `free_entries` is true, each hashmap_entry in the map is freed as well +(using stdlib's free()). `void hashmap_entry_init(void *entry, int hash)`:: @@ -127,17 +122,20 @@ call to `hashmap_get` or `hashmap_get_next`. `void *hashmap_put(struct hashmap *map, void *entry)`:: - Adds or replaces a hashmap entry. + Adds or replaces a hashmap entry. If the hashmap contains duplicate + entries equal to the specified entry, only one of them will be replaced. + `map` is the hashmap structure. + -`entry` is the entry to add or update. +`entry` is the entry to add or replace. + -Returns the previous entry, or NULL if not found (i.e. the entry was added). +Returns the replaced entry, or NULL if not found (i.e. the entry was added). `void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)`:: - Removes a hashmap entry matching the specified key. + Removes a hashmap entry matching the specified key. If the hashmap + contains duplicate entries equal to the specified key, only one of + them will be removed. + `map` is the hashmap structure. + @@ -183,14 +181,14 @@ static int long2double_cmp(const struct long2double *e1, const struct long2doubl return !(e1->key == e2->key); } -void long2double_init() +void long2double_init(void) { hashmap_init(&map, (hashmap_cmp_fn) long2double_cmp, 0); } -void long2double_free() +void long2double_free(void) { - hashmap_free(&map, free); + hashmap_free(&map, 1); } static struct long2double *find_entry(long key) diff --git a/builtin/update-index.c b/builtin/update-index.c index acd992d..00313f3 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -559,7 +559,7 @@ static int do_reupdate(int ac, const char **av, const struct cache_entry *ce = active_cache[pos]; struct cache_entry *old = NULL; int save_nr; - const char *path; + char *path; if (ce_stage(ce) || !ce_path_match(ce, &pathspec)) continue; @@ -838,7 +838,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); - free(p); + free((char *)p); ctx.argc--; ctx.argv++; break; @@ -880,7 +880,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); - free(p); + free((char *)p); } strbuf_release(&nbuf); strbuf_release(&buf); diff --git a/diffcore-rename.c b/diffcore-rename.c index d996c6a..9b4f068 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -343,7 +343,7 @@ static int find_exact_renames(struct diff_options *options) renames += find_identical_files(&file_table, i, options); /* Free the hash data structure and entries */ - hashmap_free(&file_table, free); + hashmap_free(&file_table, 1); return renames; } diff --git a/hashmap.c b/hashmap.c index 3a9f8c1..d1b8056 100644 --- a/hashmap.c +++ b/hashmap.c @@ -29,7 +29,7 @@ unsigned int strihash(const char *str) unsigned int memhash(const void *buf, size_t len) { unsigned int hash = FNV32_BASE; - unsigned char *ucbuf = (unsigned char*) buf; + unsigned char *ucbuf = (unsigned char *) buf; while (len--) { unsigned int c = *ucbuf++; hash = (hash * FNV32_PRIME) ^ c; @@ -40,7 +40,7 @@ unsigned int memhash(const void *buf, size_t len) unsigned int memihash(const void *buf, size_t len) { unsigned int hash = FNV32_BASE; - unsigned char *ucbuf = (unsigned char*) buf; + unsigned char *ucbuf = (unsigned char *) buf; while (len--) { unsigned int c = *ucbuf++; if (c >= 'a' && c <= 'z') @@ -52,17 +52,33 @@ unsigned int memihash(const void *buf, size_t len) #define HASHMAP_INITIAL_SIZE 64 /* grow / shrink by 2^2 */ -#define HASHMAP_GROW 2 -/* grow if > 80% full (to 20%) */ -#define HASHMAP_GROW_AT 1.25 -/* shrink if < 16.6% full (to 66.6%) */ -#define HASHMAP_SHRINK_AT 6 +#define HASHMAP_RESIZE_BITS 2 +/* load factor in percent */ +#define HASHMAP_LOAD_FACTOR 80 + +static void alloc_table(struct hashmap *map, unsigned int size) +{ + map->tablesize = size; + map->table = xcalloc(size, sizeof(struct hashmap_entry *)); + + /* calculate resize thresholds for new size */ + map->grow_at = (unsigned int) ((uint64_t) size * HASHMAP_LOAD_FACTOR / 100); + if (size <= HASHMAP_INITIAL_SIZE) + map->shrink_at = 0; + else + /* + * The shrink-threshold must be slightly smaller than + * (grow-threshold / resize-factor) to prevent erratic resizing, + * thus we divide by (resize-factor + 1). + */ + map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1); +} static inline int entry_equals(const struct hashmap *map, const struct hashmap_entry *e1, const struct hashmap_entry *e2, const void *keydata) { - return (e1 == e2) || (e1->hash == e2->hash && !(*map->cmpfn)(e1, e2, keydata)); + return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata)); } static inline unsigned int bucket(const struct hashmap *map, @@ -76,8 +92,7 @@ static void rehash(struct hashmap *map, unsigned int newsize) unsigned int i, oldsize = map->tablesize; struct hashmap_entry **oldtable = map->table; - map->tablesize = newsize; - map->table = xcalloc(sizeof(struct hashmap_entry*), map->tablesize); + alloc_table(map, newsize); for (i = 0; i < oldsize; i++) { struct hashmap_entry *e = oldtable[i]; while (e) { @@ -108,26 +123,28 @@ static int always_equal(const void *unused1, const void *unused2, const void *un void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size) { + unsigned int size = HASHMAP_INITIAL_SIZE; map->size = 0; map->cmpfn = equals_function ? equals_function : always_equal; + /* calculate initial table size and allocate the table */ - map->tablesize = HASHMAP_INITIAL_SIZE; - initial_size *= HASHMAP_GROW_AT; - while (initial_size > map->tablesize) - map->tablesize <<= HASHMAP_GROW; - map->table = xcalloc(sizeof(struct hashmap_entry*), map->tablesize); + initial_size = (unsigned int) ((uint64_t) initial_size * 100 + / HASHMAP_LOAD_FACTOR); + while (initial_size > size) + size <<= HASHMAP_RESIZE_BITS; + alloc_table(map, size); } -void hashmap_free(struct hashmap *map, hashmap_free_fn free_function) +void hashmap_free(struct hashmap *map, int free_entries) { if (!map || !map->table) return; - if (free_function) { + if (free_entries) { struct hashmap_iter iter; struct hashmap_entry *e; hashmap_iter_init(map, &iter); while ((e = hashmap_iter_next(&iter))) - (*free_function)(e); + free(e); } free(map->table); memset(map, 0, sizeof(*map)); @@ -140,7 +157,7 @@ void *hashmap_get(const struct hashmap *map, const void *key, const void *keydat void *hashmap_get_next(const struct hashmap *map, const void *entry) { - struct hashmap_entry *e = ((struct hashmap_entry*) entry)->next; + struct hashmap_entry *e = ((struct hashmap_entry *) entry)->next; for (; e; e = e->next) if (entry_equals(map, entry, e, NULL)) return e; @@ -152,13 +169,13 @@ void hashmap_add(struct hashmap *map, void *entry) unsigned int b = bucket(map, entry); /* add entry */ - ((struct hashmap_entry*) entry)->next = map->table[b]; + ((struct hashmap_entry *) entry)->next = map->table[b]; map->table[b] = entry; /* fix size and rehash if appropriate */ map->size++; - if (map->size * HASHMAP_GROW_AT > map->tablesize) - rehash(map, map->tablesize << HASHMAP_GROW); + if (map->size > map->grow_at) + rehash(map, map->tablesize << HASHMAP_RESIZE_BITS); } void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata) @@ -175,9 +192,8 @@ void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata) /* fix size and rehash if appropriate */ map->size--; - if (map->tablesize > HASHMAP_INITIAL_SIZE && - map->size * HASHMAP_SHRINK_AT < map->tablesize) - rehash(map, map->tablesize >> HASHMAP_GROW); + if (map->size < map->shrink_at) + rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS); return old; } diff --git a/hashmap.h b/hashmap.h index eea003a..f5b3b61 100644 --- a/hashmap.h +++ b/hashmap.h @@ -22,12 +22,11 @@ struct hashmap_entry { typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, const void *keydata); -typedef void (*hashmap_free_fn)(void *entry); struct hashmap { struct hashmap_entry **table; hashmap_cmp_fn cmpfn; - unsigned int size, tablesize; + unsigned int size, tablesize, grow_at, shrink_at; }; struct hashmap_iter { @@ -40,7 +39,7 @@ struct hashmap_iter { extern void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size); -extern void hashmap_free(struct hashmap *map, hashmap_free_fn free_function); +extern void hashmap_free(struct hashmap *map, int free_entries); /* hashmap_entry functions */ diff --git a/name-hash.c b/name-hash.c index 8871d8e..9a3bd3f 100644 --- a/name-hash.c +++ b/name-hash.c @@ -240,6 +240,6 @@ void free_name_hash(struct index_state *istate) return; istate->name_hash_initialized = 0; - hashmap_free(&istate->name_hash, NULL); - hashmap_free(&istate->dir_hash, free); + hashmap_free(&istate->name_hash, 0); + hashmap_free(&istate->dir_hash, 1); } diff --git a/t/t0011-hashmap.sh b/t/t0011-hashmap.sh index 6c699d5..391e2b6 100755 --- a/t/t0011-hashmap.sh +++ b/t/t0011-hashmap.sh @@ -221,13 +221,17 @@ test_expect_success 'grow / shrink' ' echo NULL >> expect echo size >> in && echo 256 52 >> expect && - for n in $(test_seq 10) + for n in $(test_seq 12) do echo remove key$n >> in && echo value$n >> expect done && echo size >> in && - echo 64 42 >> expect && + echo 256 40 >> expect && + echo remove key40 >> in && + echo value40 >> expect && + echo size >> in && + echo 64 39 >> expect && cat in | test-hashmap > out && test_cmp expect out diff --git a/test-hashmap.c b/test-hashmap.c index 333b7b3..7e86f88 100644 --- a/test-hashmap.c +++ b/test-hashmap.c @@ -71,7 +71,7 @@ static unsigned int hash(unsigned int method, unsigned int i, const char *key) } /* - * Test insert performance of hashmap.[ch] + * Test performance of hashmap.[ch] * Usage: time echo "perfhashmap method rounds" | test-hashmap */ static void perf_hashmap(unsigned int method, unsigned int rounds) @@ -82,7 +82,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) unsigned int *hashes; unsigned int i, j; - entries = malloc(TEST_SIZE * sizeof(struct test_entry*)); + entries = malloc(TEST_SIZE * sizeof(struct test_entry *)); hashes = malloc(TEST_SIZE * sizeof(int)); for (i = 0; i < TEST_SIZE; i++) { snprintf(buf, sizeof(buf), "%i", i); @@ -101,7 +101,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) hashmap_add(&map, entries[i]); } - hashmap_free(&map, NULL); + hashmap_free(&map, 0); } } else { /* test map lookups */ @@ -122,7 +122,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) } } - hashmap_free(&map, NULL); + hashmap_free(&map, 0); } } @@ -251,6 +251,6 @@ int main(int argc, char *argv[]) } } - hashmap_free(&map, free); + hashmap_free(&map, 1); return 0; } -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html