Jonathan Tan <jonathantanmy@xxxxxxxxxx> writes: > This is similar to using the hashmap in hashmap.c, but with an > easier-to-use API. In particular, custom entry comparisons no longer > need to be written, and lookups can be done without constructing a > temporary entry structure. A naïve question is why this needs to duplicate so much code, just to build something similar in spirit to hashmap but unlike hashmap that can take caller-defined keys, limited to using oid as the keys, instead of just being a thin API wrapper that uses hashmap as its internal implementation detail. Is the way hashmap API is structured so hard to use it in such a way, or something? > Makefile | 1 + > oidmap.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > oidmap.h | 98 ++++++++++++++++++++++++++++++++++++++++ > oidset.c | 38 ++++------------ > oidset.h | 4 +- > 5 files changed, 263 insertions(+), 30 deletions(-) > create mode 100644 oidmap.c > create mode 100644 oidmap.h > > diff --git a/Makefile b/Makefile > index ed4ca438b..64136dde4 100644 > --- a/Makefile > +++ b/Makefile > @@ -821,6 +821,7 @@ LIB_OBJS += notes-cache.o > LIB_OBJS += notes-merge.o > LIB_OBJS += notes-utils.o > LIB_OBJS += object.o > +LIB_OBJS += oidmap.o > LIB_OBJS += oidset.o > LIB_OBJS += packfile.o > LIB_OBJS += pack-bitmap.o > diff --git a/oidmap.c b/oidmap.c > new file mode 100644 > index 000000000..40e696cee > --- /dev/null > +++ b/oidmap.c > @@ -0,0 +1,152 @@ > +#include "cache.h" > +#include "oidmap.h" > + > +#define OIDMAP_INITIAL_SIZE 64 > +/* grow / shrink by 2^2 */ > +#define OIDMAP_RESIZE_BITS 2 > +/* load factor in percent */ > +#define OIDMAP_LOAD_FACTOR 80 > + > +static void alloc_table(struct oidmap *map, unsigned int size) > +{ > + map->tablesize = size; > + map->table = xcalloc(size, sizeof(struct oidmap_entry *)); > + > + /* calculate resize thresholds for new size */ > + map->grow_at = (unsigned int) ((uint64_t) size * OIDMAP_LOAD_FACTOR / 100); > + if (size <= OIDMAP_INITIAL_SIZE) > + map->shrink_at = 0; > + else > + /* > + * The shrink-threshold must be slightly smaller than > + * (grow-threshold / resize-factor) to prevent erratic resizing, > + * thus we divide by (resize-factor + 1). > + */ > + map->shrink_at = map->grow_at / ((1 << OIDMAP_RESIZE_BITS) + 1); > +} > + > +static inline unsigned int bucket(const struct oidmap *map, > + const struct object_id *key) > +{ > + unsigned int hash; > + memcpy(&hash, key->hash, sizeof(hash)); > + return hash & (map->tablesize - 1); > +} > + > +static void rehash(struct oidmap *map, unsigned int newsize) > +{ > + unsigned int i, oldsize = map->tablesize; > + struct oidmap_entry **oldtable = map->table; > + > + alloc_table(map, newsize); > + for (i = 0; i < oldsize; i++) { > + struct oidmap_entry *e = oldtable[i]; > + while (e) { > + struct oidmap_entry *next = e->next; > + unsigned int b = bucket(map, &e->oid); > + e->next = map->table[b]; > + map->table[b] = e; > + e = next; > + } > + } > + free(oldtable); > +} > + > +static inline struct oidmap_entry **find_entry_ptr(const struct oidmap *map, > + const struct object_id *key) > +{ > + struct oidmap_entry **e = &map->table[bucket(map, key)]; > + while (*e && oidcmp(&(*e)->oid, key)) > + e = &(*e)->next; > + return e; > +} > + > +void oidmap_init(struct oidmap *map, size_t initial_size) > +{ > + unsigned int size = OIDMAP_INITIAL_SIZE; > + > + memset(map, 0, sizeof(*map)); > + > + /* calculate initial table size and allocate the table */ > + initial_size = (unsigned int) ((uint64_t) initial_size * 100 > + / OIDMAP_LOAD_FACTOR); > + while (initial_size > size) > + size <<= OIDMAP_RESIZE_BITS; > + alloc_table(map, size); > + > + /* > + * Keep track of the number of items in the map and > + * allow the map to automatically grow as necessary. > + */ > + map->do_count_items = 1; > +} > + > +void oidmap_free(struct oidmap *map, int free_entries) > +{ > + if (!map || !map->table) > + return; > + if (free_entries) { > + int i; > + for (i = 0; i < map->tablesize; i++) { > + struct oidmap_entry *e = map->table[i]; > + while (e) { > + struct oidmap_entry *next = e->next; > + free(e); > + e = next; > + } > + } > + } > + free(map->table); > + memset(map, 0, sizeof(*map)); > +} > + > +void *oidmap_get(const struct oidmap *map, const struct object_id *key) > +{ > + return *find_entry_ptr(map, key); > +} > + > +static void oidmap_add(struct oidmap *map, struct oidmap_entry *entry) > +{ > + unsigned int b = bucket(map, &entry->oid); > + > + /* add entry */ > + entry->next = map->table[b]; > + map->table[b] = entry; > + > + /* fix size and rehash if appropriate */ > + if (map->do_count_items) { > + map->private_size++; > + if (map->private_size > map->grow_at) > + rehash(map, map->tablesize << OIDMAP_RESIZE_BITS); > + } > +} > + > +void *oidmap_remove(struct oidmap *map, const struct object_id *key) > +{ > + struct oidmap_entry *old; > + struct oidmap_entry **e = find_entry_ptr(map, key); > + if (!*e) > + return NULL; > + > + /* remove existing entry */ > + old = *e; > + *e = old->next; > + old->next = NULL; > + > + /* fix size and rehash if appropriate */ > + if (map->do_count_items) { > + map->private_size--; > + if (map->private_size < map->shrink_at) > + rehash(map, map->tablesize >> OIDMAP_RESIZE_BITS); > + } > + > + return old; > +} > + > +void *oidmap_put(struct oidmap *map, void *entry) > +{ > + struct oidmap_entry *to_put = entry; > + struct oidmap_entry *old = oidmap_remove(map, &to_put->oid); > + oidmap_add(map, to_put); > + return old; > +} > diff --git a/oidmap.h b/oidmap.h > new file mode 100644 > index 000000000..a543ed828 > --- /dev/null > +++ b/oidmap.h > @@ -0,0 +1,98 @@ > +#ifndef OIDMAP_H > +#define OIDMAP_H > + > +/* > + * struct oidmap_entry is a structure representing an entry in the hash table, > + * which must be used as first member of user data structures. It must be > + * zero-initialized (or use OIDMAP_ENTRY_INIT). > + */ > +struct oidmap_entry { > + /* > + * next points to the next entry in case of collisions (i.e. if > + * multiple entries map to the same bucket). For oidmap's internal use > + * only. > + */ > + struct oidmap_entry *next; > + > + struct object_id oid; > +}; > +#define OIDMAP_ENTRY_INIT { NULL } > + > +/* > + * struct oidmap is the hash table structure. Members can be used as follows, > + * but should not be modified directly. > + */ > +struct oidmap { > + struct oidmap_entry **table; > + > + /* total number of entries (0 means the hashmap is empty) */ > + unsigned int private_size; /* use oidmap_get_size() */ > + > + /* > + * tablesize is the allocated size of the hash table. A non-0 value > + * indicates that the hashmap is initialized. It may also be useful > + * for statistical purposes (i.e. `size / tablesize` is the current > + * load factor). > + */ > + unsigned int tablesize; > + > + unsigned int grow_at; > + unsigned int shrink_at; > + > + unsigned int do_count_items : 1; > +}; > + > +/* oidmap functions */ > + > +/* > + * Initializes an oidmap structure. > + * > + * `map` is the oidmap to initialize. > + * > + * If the total number of entries is known in advance, the `initial_size` > + * parameter may be used to preallocate a sufficiently large table and thus > + * prevent expensive resizing. If 0, the table is dynamically resized. > + */ > +extern void oidmap_init(struct oidmap *map, size_t initial_size); > + > +/* > + * Frees an oidmap structure and allocated memory. > + * > + * If `free_entries` is true, each oidmap_entry in the map is freed as well > + * using stdlibs free(). > + */ > +extern void oidmap_free(struct oidmap *map, int free_entries); > + > +/* > + * Return the number of items in the map. > + */ > +static inline unsigned int oidmap_get_size(struct oidmap *map) > +{ > + if (map->do_count_items) > + return map->private_size; > + > + BUG("oidmap_get_size: size not set"); > + return 0; > +} > + > +/* > + * Returns the oidmap entry for the specified oid, or NULL if not found. > + */ > +extern void *oidmap_get(const struct oidmap *map, > + const struct object_id *key); > + > +/* > + * Adds or replaces an oidmap entry. > + * > + * Returns the replaced entry, or NULL if not found (i.e. the entry was added). > + */ > +extern void *oidmap_put(struct oidmap *map, void *entry); > + > +/* > + * Removes an oidmap entry matching the specified oid. > + * > + * Returns the removed entry, or NULL if not found. > + */ > +extern void *oidmap_remove(struct oidmap *map, const struct object_id *key); > + > +#endif > diff --git a/oidset.c b/oidset.c > index a6a08ba52..6fe7036c4 100644 > --- a/oidset.c > +++ b/oidset.c > @@ -1,50 +1,30 @@ > #include "cache.h" > #include "oidset.h" > > -struct oidset_entry { > - struct hashmap_entry hash; > - struct object_id oid; > -}; > - > -static int oidset_hashcmp(const void *unused_cmp_data, > - const void *va, const void *vb, > - const void *vkey) > -{ > - const struct oidset_entry *a = va, *b = vb; > - const struct object_id *key = vkey; > - return oidcmp(&a->oid, key ? key : &b->oid); > -} > - > int oidset_contains(const struct oidset *set, const struct object_id *oid) > { > - struct hashmap_entry key; > - > - if (!set->map.cmpfn) > + if (!set->map.tablesize) > return 0; > - > - hashmap_entry_init(&key, sha1hash(oid->hash)); > - return !!hashmap_get(&set->map, &key, oid); > + return !!oidmap_get(&set->map, oid); > } > > int oidset_insert(struct oidset *set, const struct object_id *oid) > { > - struct oidset_entry *entry; > - > - if (!set->map.cmpfn) > - hashmap_init(&set->map, oidset_hashcmp, NULL, 0); > + struct oidmap_entry *entry; > > - if (oidset_contains(set, oid)) > + if (!set->map.tablesize) > + oidmap_init(&set->map, 0); > + else if (oidset_contains(set, oid)) > return 1; > > - entry = xmalloc(sizeof(*entry)); > - hashmap_entry_init(&entry->hash, sha1hash(oid->hash)); > + entry = xcalloc(1, sizeof(*entry)); > oidcpy(&entry->oid, oid); > > - hashmap_add(&set->map, entry); > + oidmap_put(&set->map, entry); > return 0; > } > > void oidset_clear(struct oidset *set) > { > - hashmap_free(&set->map, 1); > + oidmap_free(&set->map, 1); > } > diff --git a/oidset.h b/oidset.h > index b7eaab5b8..b1ec82bfc 100644 > --- a/oidset.h > +++ b/oidset.h > @@ -1,6 +1,8 @@ > #ifndef OIDSET_H > #define OIDSET_H > > +#include "oidmap.h" > + > /** > * This API is similar to sha1-array, in that it maintains a set of object ids > * in a memory-efficient way. The major differences are: > @@ -17,7 +19,7 @@ > * A single oidset; should be zero-initialized (or use OIDSET_INIT). > */ > struct oidset { > - struct hashmap map; > + struct oidmap map; > }; > > #define OIDSET_INIT { { NULL } }