[PATCH 3/3] Make the object lookup hash use a "object index" instead of a pointer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This uses less memory on 64-bit architectures (a 32-bit object index
rather than a full 64-bit pointer), but could also allow for some other
optimizations (like hiding part of the hash in unused bits of the index).

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
---
 alloc.c  |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
 cache.h  |   11 ++++---
 object.c |   50 ++++++++++++++++++++++-------------
 object.h |    2 +-
 4 files changed, 108 insertions(+), 41 deletions(-)

diff --git a/alloc.c b/alloc.c
index 70f3c32..bc6aa30 100644
--- a/alloc.c
+++ b/alloc.c
@@ -16,24 +16,78 @@
 #include "commit.h"
 #include "tag.h"
 
-#define BLOCKING 1024
+/*
+ * We allocate 1024 object nodes at a time.
+ *
+ * This not only avoids any fragmentation in the system "malloc()"
+ * libraries, it also means that we can generate a denser "object
+ * index" that does not need a full pointer: we can use an integer
+ * with the low bits indicating the object node ID within an
+ * allocation block, and the high bits indicating the allocation
+ * block number.
+ */
+#define BLOCKING_BITS 10
+#define BLOCKING_SIZE (1u << BLOCKING_BITS)
+#define BLOCKING_MASK (BLOCKING_SIZE-1)
 
-#define DEFINE_ALLOCATOR(name, type)				\
-static unsigned int name##_allocs;				\
-void *alloc_##name##_node(void)					\
-{								\
-	static int nr;						\
-	static struct name *block;				\
-								\
-	if (!nr) {						\
-		nr = BLOCKING;					\
-		block = xcalloc(BLOCKING, sizeof(type));	\
-	}							\
-	nr--;							\
-	name##_allocs++;					\
-	return block++;						\
+struct alloc_descriptor {
+	unsigned int size, nr;
+	unsigned int block;
+};
+
+struct block_descriptor {
+	unsigned int size;
+	unsigned char *base;
+};
+
+static struct block_descriptor *block_allocations;
+static unsigned int nr_block_allocations;
+
+static unsigned int alloc_new_block(unsigned int size)
+{
+	static unsigned int index;
+	unsigned int i = ++index;
+
+	if (i >= nr_block_allocations) {
+		unsigned int old = nr_block_allocations;
+		unsigned int nr = alloc_nr(i);
+		block_allocations = xrealloc(block_allocations, nr * sizeof(*block_allocations));
+		memset(block_allocations + old, 0, (nr - old) * sizeof(*block_allocations));
+		nr_block_allocations = nr;
+	}
+	block_allocations[i].size = size;
+	block_allocations[i].base = xcalloc(BLOCKING_SIZE, size);
+	return i;
+}
+
+static unsigned int allocate_one_index(struct alloc_descriptor *desc)
+{
+	unsigned int nr = desc->nr++;
+	unsigned int index = nr & BLOCKING_MASK;
+	if (!index)
+		desc->block = alloc_new_block(desc->size);
+	return (desc->block << BLOCKING_BITS) + index;
+}
+
+struct object *object_index_address(unsigned int index)
+{
+	unsigned int block = index >> BLOCKING_BITS;
+	struct block_descriptor *desc;
+
+	index &= BLOCKING_MASK;
+	if (!block || block >= nr_block_allocations)
+		die("invalid object index (block = %u/%u)", block, nr_block_allocations);
+	desc = block_allocations + block;
+	if (!desc->size || !desc->base)
+		die("bad object block index entry %u:%p", desc->size, desc->base);
+	return (struct object *)(index * desc->size + desc->base);
 }
 
+#define DEFINE_ALLOCATOR(name, type)				\
+static struct alloc_descriptor name##_desc = { sizeof(type) };	\
+unsigned int alloc_##name##_node(void)				\
+{ return allocate_one_index(&name##_desc); }
+
 union any_object {
 	struct object object;
 	struct blob blob;
@@ -62,7 +116,7 @@ static void report(const char* name, unsigned int count, size_t size)
 #undef SZ_FMT
 
 #define REPORT(name)	\
-    report(#name, name##_allocs, name##_allocs*sizeof(struct name) >> 10)
+    report(#name, name##_desc.nr, name##_desc.nr*sizeof(struct name) >> 10)
 
 void alloc_report(void)
 {
diff --git a/cache.h b/cache.h
index 4de25cc..9342980 100644
--- a/cache.h
+++ b/cache.h
@@ -476,12 +476,13 @@ int decode_85(char *dst, const char *line, int linelen);
 void encode_85(char *buf, const unsigned char *data, int bytes);
 
 /* alloc.c */
-extern void *alloc_blob_node(void);
-extern void *alloc_tree_node(void);
-extern void *alloc_commit_node(void);
-extern void *alloc_tag_node(void);
-extern void *alloc_object_node(void);
+extern unsigned int alloc_blob_node(void);
+extern unsigned int alloc_tree_node(void);
+extern unsigned int alloc_commit_node(void);
+extern unsigned int alloc_tag_node(void);
+extern unsigned int alloc_object_node(void);
 extern void alloc_report(void);
+extern struct object *object_index_address(unsigned int index);
 
 /* trace.c */
 extern int nfasprintf(char **str, const char *fmt, ...);
diff --git a/object.c b/object.c
index 7bd3fec..a91d238 100644
--- a/object.c
+++ b/object.c
@@ -5,7 +5,11 @@
 #include "commit.h"
 #include "tag.h"
 
-static struct object **obj_hash;
+struct hash_entry {
+	unsigned int index;
+};
+
+static struct hash_entry *obj_hash;
 static int nr_objs, obj_hash_size;
 
 unsigned int get_max_object_index(void)
@@ -13,9 +17,11 @@ unsigned int get_max_object_index(void)
 	return obj_hash_size;
 }
 
+/* Purely for external object walkers (like fsck) */
 struct object *get_indexed_object(unsigned int idx)
 {
-	return obj_hash[idx];
+	unsigned int object_index = obj_hash[idx].index;
+	return object_index ? object_index_address(object_index) : NULL;
 }
 
 static const char *object_type_strings[] = {
@@ -49,16 +55,16 @@ static unsigned int hash_obj(struct object *obj, unsigned int n)
 	return hash % n;
 }
 
-static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
+static void insert_obj_hash(unsigned int object_index, struct object *object, struct hash_entry *hash, unsigned int size)
 {
-	int j = hash_obj(obj, size);
+	int j = hash_obj(object, size);
 
-	while (hash[j]) {
+	while (hash[j].index) {
 		j++;
 		if (j >= size)
 			j = 0;
 	}
-	hash[j] = obj;
+	hash[j].index = object_index;
 }
 
 static int hashtable_index(const unsigned char *sha1)
@@ -71,43 +77,49 @@ static int hashtable_index(const unsigned char *sha1)
 struct object *lookup_object(const unsigned char *sha1)
 {
 	int i;
-	struct object *obj;
 
 	if (!obj_hash)
 		return NULL;
 
 	i = hashtable_index(sha1);
-	while ((obj = obj_hash[i]) != NULL) {
-		if (!hashcmp(sha1, obj->sha1))
+	for (;;) {
+		struct hash_entry *entry = obj_hash + i;
+		unsigned int index = entry->index;
+		struct object *obj;
+
+		if (!index)
 			break;
+		obj = object_index_address(index);
+		if (!hashcmp(sha1, obj->sha1))
+			return obj;
 		i++;
 		if (i == obj_hash_size)
 			i = 0;
 	}
-	return obj;
+	return NULL;
 }
 
 static void grow_object_hash(void)
 {
 	int i;
 	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
-	struct object **new_hash;
+	struct hash_entry *new_hash;
 
-	new_hash = xcalloc(new_hash_size, sizeof(struct object *));
+	new_hash = xcalloc(new_hash_size, sizeof(struct hash_entry));
 	for (i = 0; i < obj_hash_size; i++) {
-		struct object *obj = obj_hash[i];
-		if (!obj)
+		unsigned int index = obj_hash[i].index;
+		if (!index)
 			continue;
-		insert_obj_hash(obj, new_hash, new_hash_size);
+		insert_obj_hash(index, object_index_address(index), new_hash, new_hash_size);
 	}
 	free(obj_hash);
 	obj_hash = new_hash;
 	obj_hash_size = new_hash_size;
 }
 
-void *create_object(const unsigned char *sha1, int type, void *o)
+void *create_object(const unsigned char *sha1, int type, unsigned int index)
 {
-	struct object *obj = o;
+	struct object *obj = object_index_address(index);
 
 	obj->parsed = 0;
 	obj->used = 0;
@@ -118,7 +130,7 @@ void *create_object(const unsigned char *sha1, int type, void *o)
 	if (obj_hash_size - 1 <= nr_objs * 2)
 		grow_object_hash();
 
-	insert_obj_hash(obj, obj_hash, obj_hash_size);
+	insert_obj_hash(index, obj, obj_hash, obj_hash_size);
 	nr_objs++;
 	return obj;
 }
@@ -127,7 +139,7 @@ struct object *lookup_unknown_object(const unsigned char *sha1)
 {
 	struct object *obj = lookup_object(sha1);
 	if (!obj)
-		obj = create_object(sha1, OBJ_NONE, alloc_object_node());
+		return create_object(sha1, OBJ_NONE, alloc_object_node());
 	return obj;
 }
 
diff --git a/object.h b/object.h
index c0a5fd3..099bbdb 100644
--- a/object.h
+++ b/object.h
@@ -47,7 +47,7 @@ extern struct object_refs *lookup_object_refs(struct object *);
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
 
-extern void *create_object(const unsigned char *sha1, int type, void *obj);
+extern void *create_object(const unsigned char *sha1, int type, unsigned int object_index);
 
 /** Returns the object, having parsed it to find out what it is. **/
 struct object *parse_object(const unsigned char *sha1);
-- 
1.5.1.1.107.g7a15

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]