Re: [PATCH 1/2] Add `struct object_hash`

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Johannes Schindelin <Johannes.Schindelin@xxxxxx> writes:

> Using object_hash, you can store interesting information about
> objects in a private hash map. This makes up for the lack of a
> `util` member of `struct object`.
>
> Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx>
> ---
>
> 	We should have done that earlier. Now, git.git already
> 	contains two specialised versions of an object hash map:
> 	obj_hash and refs_hash.
>
> 	obj_hash is not really a hash map, but rather a hash set,
> 	but there is no excuse for refs_hash having its own little
> 	private implementation nobody else can reuse.

Ok, having now looked at it more, I can say:

 - I hate it.

I dislike it intensely, because it's so _close_ to being usable.
For one thing, I do not like that the patch was not accompanied
with rewrite of object-refs.c using this new facility.

And I kind of know why.  You would end up making obj_refs quite
more expensive by doing so if you use two-pointer hash_entry
that points at a separate structure (and you would want the hash
table not too full, which means you would waste at least twice
as much memory for growing room for the hashtable, plus malloc
overhead for the separate flexible array of refs), and obj_refs
is something we would really want to keep its memory footprint
to the bare minimum, because nowhere else in git we keep
information on potentially thousands of objects.  I suspect that
it would have been bit more palatable if object_hash_entry were
a flex structure type like object_refs.

> +static void grow_object_hash(struct object_hash *hash)
> +{
> +	struct object_hash_entry *old_hash = hash->hash;
> +	int new_size = (hash->size + 1000) * 3 / 2, i;
> +
> +	hash->hash = xcalloc(sizeof(struct object_hash_entry), new_size);
> +	for (i = 0; i < hash->size; i++) {
> +		struct object_hash_entry *entry = old_hash + i;
> +		if (!entry->object)
> +			continue;
> +		insert_object_into_hash(entry->object, entry->util, hash);
> +	}
> +	hash->size = new_size;
> +	free(old_hash);
> +}

Isn't this broken?  You still have old hash size in hash->size
inside loop, which is given to insert_object_info_hash(), so the
rehashing will use the old (smaller) hash size to compute the
new location for the entry.  You want to keep old_size in a
variable to control the loop termination, update hash->size with
new_size at the same time you update hash->hash with the new
empty array, and walk the old array by hand, rehashing the
existing entries into new hashtable.

Here is a suggested replacement; patch is applicable on top of
v1.5.0 (or master).

-- >8 --
From: Johannes Schindelin <Johannes.Schindelin@xxxxxx>

Add `struct object_hash`

Using object_hash, you can store interesting information about
objects in a private hash map.  The earlier object-refs API is
now built on top of this generalized facility

Signed-off-by: Junio C Hamano <junkio@xxxxxxx>

---
 Makefile      |    3 +-
 object-hash.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++++++
 object-refs.c |   71 ++++-----------------------------------------------------
 object.h      |   19 ++++++++++++++-
 4 files changed, 89 insertions(+), 68 deletions(-)
 create mode 100644 object-hash.c

diff --git a/Makefile b/Makefile
index 40bdcff..0b0a926 100644
--- a/Makefile
+++ b/Makefile
@@ -262,7 +262,8 @@ LIB_OBJS = \
 	revision.o pager.o tree-walk.o xdiff-interface.o \
 	write_or_die.o trace.o list-objects.o grep.o \
 	alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
-	color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o
+	color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
+	object-hash.o
 
 BUILTIN_OBJS = \
 	builtin-add.o \
diff --git a/object-hash.c b/object-hash.c
new file mode 100644
index 0000000..8e16e0e
--- /dev/null
+++ b/object-hash.c
@@ -0,0 +1,64 @@
+#include "cache.h"
+#include "object.h"
+
+static unsigned int hash_obj(struct object *obj, unsigned int n)
+{
+	unsigned int hash = *(unsigned int *)obj->sha1;
+	return hash % n;
+}
+
+static void insert_object_into_hash(struct object_hash_entry *ent, struct object_hash *hash)
+{
+	int j = hash_obj(ent->object, hash->size);
+
+	while (hash->hash[j]) {
+		j++;
+		if (j >= hash->size)
+			j = 0;
+	}
+	hash->hash[j] = ent;
+}
+
+static void grow_object_hash(struct object_hash *hash)
+{
+	struct object_hash_entry **old_hash = hash->hash;
+	int i;
+	int old_size = hash->size;
+	int new_size = (old_size + 1000) * 3 / 2;
+
+	hash->hash = xcalloc(new_size, sizeof(struct object_hash_entry *));
+	hash->size = new_size;
+	for (i = 0; i < old_size; i++) {
+		struct object_hash_entry *entry = old_hash[i];
+		if (!entry)
+			continue;
+		insert_object_into_hash(entry, hash);
+	}
+	free(old_hash);
+}
+
+void add_object_to_hash(struct object_hash_entry *ent, struct object_hash *hash)
+{
+	if (++hash->nr > hash->size * 2 / 3)
+		grow_object_hash(hash);
+	insert_object_into_hash(ent, hash);
+}
+
+struct object_hash_entry *lookup_object_in_hash(struct object *obj, struct object_hash *hash)
+{
+	struct object_hash_entry *ent;
+	int j;
+
+	/* nothing to lookup */
+	if (!hash->size)
+		return NULL;
+	j = hash_obj(obj, hash->size);
+	while ((ent = hash->hash[j]) != NULL) {
+		if (ent->object == obj)
+			break;
+		j++;
+		if (j >= hash->size)
+			j = 0;
+	}
+	return ent;
+}
diff --git a/object-refs.c b/object-refs.c
index 98ea100..022b5ca 100644
--- a/object-refs.c
+++ b/object-refs.c
@@ -3,73 +3,11 @@
 
 int track_object_refs = 0;
 
-static unsigned int refs_hash_size, nr_object_refs;
-static struct object_refs **refs_hash;
-
-static unsigned int hash_obj(struct object *obj, unsigned int n)
-{
-	unsigned int hash = *(unsigned int *)obj->sha1;
-	return hash % n;
-}
-
-static void insert_ref_hash(struct object_refs *ref, struct object_refs **hash, unsigned int size)
-{
-	int j = hash_obj(ref->base, size);
-
-	while (hash[j]) {
-		j++;
-		if (j >= size)
-			j = 0;
-	}
-	hash[j] = ref;
-}
-
-static void grow_refs_hash(void)
-{
-	int i;
-	int new_hash_size = (refs_hash_size + 1000) * 3 / 2;
-	struct object_refs **new_hash;
-
-	new_hash = xcalloc(new_hash_size, sizeof(struct object_refs *));
-	for (i = 0; i < refs_hash_size; i++) {
-		struct object_refs *ref = refs_hash[i];
-		if (!ref)
-			continue;
-		insert_ref_hash(ref, new_hash, new_hash_size);
-	}
-	free(refs_hash);
-	refs_hash = new_hash;
-	refs_hash_size = new_hash_size;
-}
-
-static void add_object_refs(struct object *obj, struct object_refs *ref)
-{
-	int nr = nr_object_refs + 1;
-
-	if (nr > refs_hash_size * 2 / 3)
-		grow_refs_hash();
-	ref->base = obj;
-	insert_ref_hash(ref, refs_hash, refs_hash_size);
-	nr_object_refs = nr;
-}
+static struct object_hash refs_hash;
 
 struct object_refs *lookup_object_refs(struct object *obj)
 {
-	struct object_refs *ref;
-	int j;
-
-	/* nothing to lookup */
-	if (!refs_hash_size)
-		return NULL;
-	j = hash_obj(obj, refs_hash_size);
-	while ((ref = refs_hash[j]) != NULL) {
-		if (ref->base == obj)
-			break;
-		j++;
-		if (j >= refs_hash_size)
-			j = 0;
-	}
-	return ref;
+	return (struct object_refs *)lookup_object_in_hash(obj, &refs_hash);
 }
 
 struct object_refs *alloc_object_refs(unsigned count)
@@ -120,7 +58,8 @@ void set_object_refs(struct object *obj, struct object_refs *refs)
 
 	for (i = 0; i < refs->count; i++)
 		refs->ref[i]->used = 1;
-	add_object_refs(obj, refs);
+	refs->base = obj;
+	add_object_to_hash((struct object_hash_entry *)refs, &refs_hash);
 }
 
 void mark_reachable(struct object *obj, unsigned int mask)
@@ -133,7 +72,7 @@ void mark_reachable(struct object *obj, unsigned int mask)
 	if (obj->flags & mask)
 		return;
 	obj->flags |= mask;
-	refs = lookup_object_refs(obj);
+	refs = (struct object_refs *)lookup_object_in_hash(obj, &refs_hash);
 	if (refs) {
 		unsigned i;
 		for (i = 0; i < refs->count; i++)
diff --git a/object.h b/object.h
index caee733..c417cf2 100644
--- a/object.h
+++ b/object.h
@@ -7,11 +7,28 @@ struct object_list {
 };
 
 struct object_refs {
-	unsigned count;
 	struct object *base;
+	unsigned count;
 	struct object *ref[FLEX_ARRAY]; /* more */
 };
 
+/*
+ * This is a "shell" type that represents the actual structure you
+ * would place in object_hash.  The user would cast up the pointer to
+ * object_hash_entry to a more concrete type, such as object_refs.
+ */
+struct object_hash_entry {
+	struct object *object;
+};
+
+struct object_hash {
+	unsigned int size, nr;
+	struct object_hash_entry **hash;
+};
+
+void add_object_to_hash(struct object_hash_entry *, struct object_hash *);
+struct object_hash_entry *lookup_object_in_hash(struct object *, struct object_hash *);
+
 struct object_array {
 	unsigned int nr;
 	unsigned int alloc;

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]