Am 31.03.20 um 21:14 schrieb René Scharfe: > Am 31.03.20 um 11:45 schrieb Jeff King: >> diff --git a/fast-import.c b/fast-import.c >> index 202dda11a6..6ebac665a0 100644 >> --- a/fast-import.c >> +++ b/fast-import.c >> @@ -39,12 +39,25 @@ >> >> struct object_entry { >> struct pack_idx_entry idx; >> - struct object_entry *next; >> uint32_t type : TYPE_BITS, >> pack_id : PACK_ID_BITS, >> depth : DEPTH_BITS; >> }; >> >> +static inline unsigned int object_entry_hash(struct object_entry *oe) >> +{ >> + return oidhash(&oe->idx.oid); >> +} >> + >> +static inline int object_entry_equal(struct object_entry *a, >> + struct object_entry *b) >> +{ >> + return oideq(&a->idx.oid, &b->idx.oid); >> +} >> + >> +KHASH_INIT(object_entry_set, struct object_entry *, int, 0, >> + object_entry_hash, object_entry_equal); >> + >> struct object_entry_pool { >> struct object_entry_pool *next_pool; >> struct object_entry *next_free; >> @@ -178,7 +191,7 @@ static off_t pack_size; >> /* Table of objects we've written. */ >> static unsigned int object_entry_alloc = 5000; >> static struct object_entry_pool *blocks; >> -static struct object_entry *object_table[1 << 16]; >> +static kh_object_entry_set_t object_table; >> static struct mark_set *marks; >> static const char *export_marks_file; >> static const char *import_marks_file; >> @@ -455,44 +468,45 @@ static struct object_entry *new_object(struct object_id *oid) >> >> static struct object_entry *find_object(struct object_id *oid) >> { >> - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; >> - struct object_entry *e; >> - for (e = object_table[h]; e; e = e->next) >> - if (oideq(oid, &e->idx.oid)) >> - return e; >> + /* >> + * this cast works because we only look at the oid part of the entry, >> + * and it comes first in the struct >> + */ >> + khiter_t pos = kh_get_object_entry_set(&object_table, >> + (struct object_entry *)oid); > > Dirty, but I can believe the comment. > > >> + if (pos != kh_end(&object_table)) >> + return kh_key(&object_table, pos); >> return NULL; >> } >> >> static struct object_entry *insert_object(struct object_id *oid) >> { >> - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; >> - struct object_entry *e = object_table[h]; >> + struct object_entry *e; >> + int was_empty; >> + khiter_t pos; >> >> - while (e) { >> - if (oideq(oid, &e->idx.oid)) >> - return e; >> - e = e->next; >> - } >> + pos = kh_put_object_entry_set(&object_table, (struct object_entry *)oid, &was_empty); > > Now this looks illegal. khash is surely reading a full object_entry from oid, > which only is a mere object_id, no? No, it's a set of pointers, and khash only accesses the referenced objects via the hash and comparison functions. Storing the objects directly in the set and getting rid of new_object() could improve performance due to reduced dereferencing overhead -- or make it slower because more data has to be copied when the hashmap needs to grow. Worth a shot. Later. René