Junio C Hamano <gitster@xxxxxxxxx> writes: > Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> writes: > >> If a position in object hash table is taken, we currently check out >> the next one. This could potentially create long object chains. We >> could create linked lists instead and leave the next slot alone. > > In the current code, not just the logic in lookup_object(), but the > logic to enforce load factor when create_object() decides to call > grow_object_hash() and object enumeration implemented by > get_max_object_index() and get_indexed_object() are closely tied to > the open addressing scheme. If you want to switch to any other > method (e.g. separate chaining) these need to be updated quite a > bit. > > I do not see get_max_object_index() and get_indexed_object() updated > at all. Do fsck, index-pack, name-rev and upload-pack still work? You may want to start with a bit more abstraction around the hashtable API. Perhaps like this? The idea is to let your object enumerator to be not just a simple unsigned int into the flat hashtable, but be something like typedef struct { unsigned int slot; struct obj_list *list; } object_enumerator; You store the current index in obj_hash[] to enu.slot, and if that is IS_LST(), the linked-list element you are looking at in enu.list. When you "increment" the iterator in object_enumerator_next(), you increment enu.slot only after you reach the tail of the enu.list. builtin/fsck.c | 17 ++++++++++------- builtin/index-pack.c | 11 +++++++---- builtin/name-rev.c | 20 +++++++++++--------- object.c | 22 +++++++++++++++++++--- object.h | 8 ++++++-- upload-pack.c | 23 ++++++++++++++--------- 6 files changed, 67 insertions(+), 34 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index bb9a2cd..5688cad 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -270,22 +270,25 @@ static void check_object(struct object *obj) static void check_connectivity(void) { - int i, max; + int max; + object_enumerator enu; /* Traverse the pending reachable objects */ traverse_reachable(); /* Look up all the requirements, warn about missing objects.. */ - max = get_max_object_index(); + max = begin_object_enumeration(&enu); if (verbose) fprintf(stderr, "Checking connectivity (%d objects)\n", max); - for (i = 0; i < max; i++) { - struct object *obj = get_indexed_object(i); - - if (obj) - check_object(obj); + if (max) { + do { + struct object *obj = get_enumerated_object(&enu); + if (obj) + check_object(obj); + } while (object_enumeration_next(&enu)); } + end_object_enumeration(&enu); } static int fsck_obj(struct object *obj) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ef62124..1d5b65c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -195,11 +195,14 @@ static void check_object(struct object *obj) static void check_objects(void) { - unsigned i, max; + object_enumerator enu; - max = get_max_object_index(); - for (i = 0; i < max; i++) - check_object(get_indexed_object(i)); + if (begin_object_enumeration(&enu)) { + do { + check_object(get_enumerated_object(&enu)); + } while (object_enumeration_next(&enu)); + } + end_object_enumeration(&enu); } diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 6238247..239c3ef 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -286,16 +286,18 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix) name_rev_line(p, &data); } } else if (all) { - int i, max; - - max = get_max_object_index(); - for (i = 0; i < max; i++) { - struct object *obj = get_indexed_object(i); - if (!obj || obj->type != OBJ_COMMIT) - continue; - show_name(obj, NULL, - always, allow_undefined, data.name_only); + object_enumerator enu; + + if (begin_object_enumeration(&enu)) { + do { + struct object *obj = get_enumerated_object(&enu); + if (!obj || obj->type != OBJ_COMMIT) + continue; + show_name(obj, NULL, + always, allow_undefined, data.name_only); + } while (object_enumeration_next(&enu)); } + end_object_enumeration(&enu); } else { int i; for (i = 0; i < revs.nr; i++) diff --git a/object.c b/object.c index 20703f5..f5b754f 100644 --- a/object.c +++ b/object.c @@ -8,14 +8,30 @@ static struct object **obj_hash; static int nr_objs, obj_hash_size; -unsigned int get_max_object_index(void) +unsigned int begin_object_enumeration(object_enumerator *enu) { + *enu = 0; return obj_hash_size; } -struct object *get_indexed_object(unsigned int idx) +struct object *get_enumerated_object(object_enumerator *enu) { - return obj_hash[idx]; + int ix = *enu; + + if (obj_hash_size <= ix) + die("BUG: get_enumerated_object() called beyond the end"); + return obj_hash[*enu]; +} + +int object_enumeration_next(object_enumerator *enu) +{ + return ++*enu < obj_hash_size; +} + +void end_object_enumeration(object_enumerator *enu) +{ + /* Nothing to free (yet) */ + ; } static const char *object_type_strings[] = { diff --git a/object.h b/object.h index 97d384b..5435d58 100644 --- a/object.h +++ b/object.h @@ -35,8 +35,12 @@ struct object { extern const char *typename(unsigned int type); extern int type_from_string(const char *str); -extern unsigned int get_max_object_index(void); -extern struct object *get_indexed_object(unsigned int); +typedef unsigned int object_enumerator; + +extern unsigned int begin_object_enumeration(object_enumerator *); +extern struct object *get_enumerated_object(object_enumerator *); +extern int object_enumeration_next(object_enumerator *); +extern void end_object_enumeration(object_enumerator *); /* * This can be used to see if we have heard of the object before, but diff --git a/upload-pack.c b/upload-pack.c index f5673ee..618b211 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -502,6 +502,7 @@ static void check_non_tip(void) struct object *o; char namebuf[42]; /* ^ + SHA-1 + LF */ int i; + object_enumerator enu; /* In the normal in-process case non-tip request can never happen */ if (!stateless_rpc) @@ -525,16 +526,20 @@ static void check_non_tip(void) namebuf[0] = '^'; namebuf[41] = '\n'; - for (i = get_max_object_index(); 0 < i; ) { - o = get_indexed_object(--i); - if (!o) - continue; - if (!is_our_ref(o)) - continue; - memcpy(namebuf + 1, sha1_to_hex(o->sha1), 40); - if (write_in_full(cmd.in, namebuf, 42) < 0) - goto error; + + if (begin_object_enumeration(&enu)) { + do { + o = get_enumerated_object(&enu); + if (!o) + continue; + if (!is_our_ref(o)) + continue; + memcpy(namebuf + 1, sha1_to_hex(o->sha1), 40); + if (write_in_full(cmd.in, namebuf, 42) < 0) + goto error; + } while (object_enumeration_next(&enu)); } + end_object_enumeration(&enu); namebuf[40] = '\n'; for (i = 0; i < want_obj.nr; i++) { o = want_obj.objects[i].item; -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html