Re: [Toy PATCH] Avoid spilling collided entries in object hash table to the next slots

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Junio C Hamano <gitster@xxxxxxxxx> writes:

> Nguyễn Thái Ngọc Duy  <pclouds@xxxxxxxxx> writes:
>
>> If a position in object hash table is taken, we currently check out
>> the next one. This could potentially create long object chains. We
>> could create linked lists instead and leave the next slot alone.
>
> In the current code, not just the logic in lookup_object(), but the
> logic to enforce load factor when create_object() decides to call
> grow_object_hash() and object enumeration implemented by
> get_max_object_index() and get_indexed_object() are closely tied to
> the open addressing scheme.  If you want to switch to any other
> method (e.g. separate chaining) these need to be updated quite a
> bit.
>
> I do not see get_max_object_index() and get_indexed_object() updated
> at all.  Do fsck, index-pack, name-rev and upload-pack still work?

You may want to start with a bit more abstraction around the
hashtable API.  Perhaps like this?

The idea is to let your object enumerator to be not just a simple
unsigned int into the flat hashtable, but be something like

	typedef struct {
        	unsigned int slot;
                struct obj_list *list;
	} object_enumerator;

You store the current index in obj_hash[] to enu.slot, and if that
is IS_LST(), the linked-list element you are looking at in enu.list.
When you "increment" the iterator in object_enumerator_next(), you
increment enu.slot only after you reach the tail of the enu.list.



 builtin/fsck.c       | 17 ++++++++++-------
 builtin/index-pack.c | 11 +++++++----
 builtin/name-rev.c   | 20 +++++++++++---------
 object.c             | 22 +++++++++++++++++++---
 object.h             |  8 ++++++--
 upload-pack.c        | 23 ++++++++++++++---------
 6 files changed, 67 insertions(+), 34 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index bb9a2cd..5688cad 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -270,22 +270,25 @@ static void check_object(struct object *obj)
 
 static void check_connectivity(void)
 {
-	int i, max;
+	int max;
+	object_enumerator enu;
 
 	/* Traverse the pending reachable objects */
 	traverse_reachable();
 
 	/* Look up all the requirements, warn about missing objects.. */
-	max = get_max_object_index();
+	max = begin_object_enumeration(&enu);
 	if (verbose)
 		fprintf(stderr, "Checking connectivity (%d objects)\n", max);
 
-	for (i = 0; i < max; i++) {
-		struct object *obj = get_indexed_object(i);
-
-		if (obj)
-			check_object(obj);
+	if (max) {
+		do {
+			struct object *obj = get_enumerated_object(&enu);
+			if (obj)
+				check_object(obj);
+		} while (object_enumeration_next(&enu));
 	}
+	end_object_enumeration(&enu);
 }
 
 static int fsck_obj(struct object *obj)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index ef62124..1d5b65c 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -195,11 +195,14 @@ static void check_object(struct object *obj)
 
 static void check_objects(void)
 {
-	unsigned i, max;
+	object_enumerator enu;
 
-	max = get_max_object_index();
-	for (i = 0; i < max; i++)
-		check_object(get_indexed_object(i));
+	if (begin_object_enumeration(&enu)) {
+		do {
+			check_object(get_enumerated_object(&enu));
+		} while (object_enumeration_next(&enu));
+	}
+	end_object_enumeration(&enu);
 }
 
 
diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 6238247..239c3ef 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -286,16 +286,18 @@ int cmd_name_rev(int argc, const char **argv, const char *prefix)
 			name_rev_line(p, &data);
 		}
 	} else if (all) {
-		int i, max;
-
-		max = get_max_object_index();
-		for (i = 0; i < max; i++) {
-			struct object *obj = get_indexed_object(i);
-			if (!obj || obj->type != OBJ_COMMIT)
-				continue;
-			show_name(obj, NULL,
-				  always, allow_undefined, data.name_only);
+		object_enumerator enu;
+
+		if (begin_object_enumeration(&enu)) {
+			do {
+				struct object *obj = get_enumerated_object(&enu);
+				if (!obj || obj->type != OBJ_COMMIT)
+					continue;
+				show_name(obj, NULL,
+					  always, allow_undefined, data.name_only);
+			} while (object_enumeration_next(&enu));
 		}
+		end_object_enumeration(&enu);
 	} else {
 		int i;
 		for (i = 0; i < revs.nr; i++)
diff --git a/object.c b/object.c
index 20703f5..f5b754f 100644
--- a/object.c
+++ b/object.c
@@ -8,14 +8,30 @@
 static struct object **obj_hash;
 static int nr_objs, obj_hash_size;
 
-unsigned int get_max_object_index(void)
+unsigned int begin_object_enumeration(object_enumerator *enu)
 {
+	*enu = 0;
 	return obj_hash_size;
 }
 
-struct object *get_indexed_object(unsigned int idx)
+struct object *get_enumerated_object(object_enumerator *enu)
 {
-	return obj_hash[idx];
+	int ix = *enu;
+
+	if (obj_hash_size <= ix)
+		die("BUG: get_enumerated_object() called beyond the end");
+	return obj_hash[*enu];
+}
+
+int object_enumeration_next(object_enumerator *enu)
+{
+	return ++*enu < obj_hash_size;
+}
+
+void end_object_enumeration(object_enumerator *enu)
+{
+	/* Nothing to free (yet) */
+	;
 }
 
 static const char *object_type_strings[] = {
diff --git a/object.h b/object.h
index 97d384b..5435d58 100644
--- a/object.h
+++ b/object.h
@@ -35,8 +35,12 @@ struct object {
 extern const char *typename(unsigned int type);
 extern int type_from_string(const char *str);
 
-extern unsigned int get_max_object_index(void);
-extern struct object *get_indexed_object(unsigned int);
+typedef unsigned int object_enumerator;
+
+extern unsigned int begin_object_enumeration(object_enumerator *);
+extern struct object *get_enumerated_object(object_enumerator *);
+extern int object_enumeration_next(object_enumerator *);
+extern void end_object_enumeration(object_enumerator *);
 
 /*
  * This can be used to see if we have heard of the object before, but
diff --git a/upload-pack.c b/upload-pack.c
index f5673ee..618b211 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -502,6 +502,7 @@ static void check_non_tip(void)
 	struct object *o;
 	char namebuf[42]; /* ^ + SHA-1 + LF */
 	int i;
+	object_enumerator enu;
 
 	/* In the normal in-process case non-tip request can never happen */
 	if (!stateless_rpc)
@@ -525,16 +526,20 @@ static void check_non_tip(void)
 
 	namebuf[0] = '^';
 	namebuf[41] = '\n';
-	for (i = get_max_object_index(); 0 < i; ) {
-		o = get_indexed_object(--i);
-		if (!o)
-			continue;
-		if (!is_our_ref(o))
-			continue;
-		memcpy(namebuf + 1, sha1_to_hex(o->sha1), 40);
-		if (write_in_full(cmd.in, namebuf, 42) < 0)
-			goto error;
+
+	if (begin_object_enumeration(&enu)) {
+		do {
+			o = get_enumerated_object(&enu);
+			if (!o)
+				continue;
+			if (!is_our_ref(o))
+				continue;
+			memcpy(namebuf + 1, sha1_to_hex(o->sha1), 40);
+			if (write_in_full(cmd.in, namebuf, 42) < 0)
+				goto error;
+		} while (object_enumeration_next(&enu));
 	}
+	end_object_enumeration(&enu);
 	namebuf[40] = '\n';
 	for (i = 0; i < want_obj.nr; i++) {
 		o = want_obj.objects[i].item;


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]