[PATCH 07/11] object: try naive cuckoo hashing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>

---
../+v/65c71a61547aecd8a6830391fda31ed5d18e0529/git-pack-objects
Counting objects: 2139209, done.
32.04user 3.43system 0:35.58elapsed 99%CPU (0avgtext+0avgdata 8178672maxresident)k
0inputs+0outputs (0major+1206546minor)pagefaults 0swaps
Counting objects: 2139209, done.
33.41user 3.15system 0:36.67elapsed 99%CPU (0avgtext+0avgdata 8178688maxresident)k
0inputs+0outputs (0major+1206547minor)pagefaults 0swaps
Counting objects: 2139209, done.
32.24user 3.17system 0:35.51elapsed 99%CPU (0avgtext+0avgdata 8179968maxresident)k
0inputs+0outputs (0major+1206598minor)pagefaults 0swaps
---
 object.c |   99 ++++++++++++++++++++++++++++++++++++++++++--------------------
 1 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/object.c b/object.c
index c2c0a7d..7624c48 100644
--- a/object.c
+++ b/object.c
@@ -50,33 +50,52 @@ static unsigned int hash_val(const unsigned char *sha1)
 	return hash;
 }
 
-static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
-{
-	unsigned int j = hash_val(obj->sha1) & (size-1);
 
-	while (hash[j]) {
-		j++;
-		if (j >= size)
-			j = 0;
-	}
-	hash[j] = obj;
-}
+#define H1(sha1) (hash_val(sha1) % obj_hash_size)
+#define H2(sha1) (hash_val((sha1) + sizeof(unsigned int)) % obj_hash_size)
 
 struct object *lookup_object(const unsigned char *sha1)
 {
-	unsigned int i;
 	struct object *obj;
 
 	if (!obj_hash)
 		return NULL;
 
-	i = hash_val(sha1) & (obj_hash_size-1);
-	while ((obj = obj_hash[i]) != NULL) {
-		if (!hashcmp(sha1, obj->sha1))
-			break;
-		i++;
-		if (i == obj_hash_size)
-			i = 0;
+	if ((obj = obj_hash[H1(sha1)]) && !hashcmp(sha1, obj->sha1))
+		return obj;
+	if ((obj = obj_hash[H2(sha1)]) && !hashcmp(sha1, obj->sha1))
+		return obj;
+	return NULL;
+}
+
+static void grow_object_hash(void); /* forward */
+
+/*
+ * A naive single-table cuckoo hashing implementation.
+ * Return NULL when "obj" is successfully inserted. Otherwise
+ * return a pointer to the object to be inserted (which may
+ * be different from the original obj). The caller is expected
+ * to grow the hash table and re-insert the returned object.
+ */
+static struct object *insert_obj_hash(struct object *obj)
+{
+	int loop;
+
+	for (loop = obj_hash_size / 2; 0 <= loop; loop--) {
+		struct object *tmp_obj;
+		unsigned int ix;
+
+		ix = H1(obj->sha1);
+		if (!obj_hash[ix]) {
+			obj_hash[ix] = obj;
+			return NULL;
+		}
+		ix = H2(obj->sha1);
+		tmp_obj = obj_hash[ix];
+		obj_hash[ix] = obj;
+		if (!tmp_obj)
+			return NULL;
+		obj = tmp_obj;
 	}
 	return obj;
 }
@@ -89,25 +108,36 @@ static int next_size(int sz)
 
 static void grow_object_hash(void)
 {
-	int i;
-	int new_hash_size = next_size(obj_hash_size);
-	struct object **new_hash;
-
-	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
-	for (i = 0; i < obj_hash_size; i++) {
-		struct object *obj = obj_hash[i];
-		if (!obj)
+	struct object **current_hash;
+	int current_size;
+
+	current_hash = obj_hash;
+	current_size = obj_hash_size;
+	while (1) {
+		int i;
+		obj_hash_size = next_size(obj_hash_size);
+		obj_hash = xcalloc(obj_hash_size, sizeof(struct object *));
+
+		for (i = 0; i < current_size; i++) {
+			if (!current_hash[i])
+				continue;
+			if (insert_obj_hash(current_hash[i]))
+				break;
+		}
+		if (i < current_size) {
+			/* too small - grow and retry */
+			free(obj_hash);
 			continue;
-		insert_obj_hash(obj, new_hash, new_hash_size);
+		}
+		free(current_hash);
+		return;
 	}
-	free(obj_hash);
-	obj_hash = new_hash;
-	obj_hash_size = new_hash_size;
 }
 
 void *create_object(const unsigned char *sha1, int type, void *o)
 {
 	struct object *obj = o;
+	struct object *to_insert;
 
 	obj->parsed = 0;
 	obj->used = 0;
@@ -117,8 +147,13 @@ void *create_object(const unsigned char *sha1, int type, void *o)
 
 	if (obj_hash_size - 1 <= nr_objs * 2)
 		grow_object_hash();
-
-	insert_obj_hash(obj, obj_hash, obj_hash_size);
+	to_insert = obj;
+	while (1) {
+		to_insert = insert_obj_hash(to_insert);
+		if (!to_insert)
+			break;
+		grow_object_hash();
+	}
 	nr_objs++;
 	return obj;
 }
-- 
1.7.6.433.g1421f

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]