[PATCH 4/6] introduce a commit metapack

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When we are doing a commit traversal that does not need to
look at the commit messages themselves (e.g., rev-list,
merge-base, etc), we spend a lot of time accessing,
decompressing, and parsing the commit objects just to find
the parent and timestamp information. We can make a
space-time tradeoff by caching that information on disk in a
compact, uncompressed format.

TODO: document on-disk format in Documentation/technical
TODO: document API

Signed-off-by: Jeff King <peff@xxxxxxxx>
---
 Makefile          |   2 +
 commit-metapack.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 commit-metapack.h |  12 ++++
 3 files changed, 189 insertions(+)
 create mode 100644 commit-metapack.c
 create mode 100644 commit-metapack.h

diff --git a/Makefile b/Makefile
index 3e4ae1b..6ca5320 100644
--- a/Makefile
+++ b/Makefile
@@ -619,6 +619,7 @@ LIB_H += column.h
 LIB_H += cache.h
 LIB_H += color.h
 LIB_H += column.h
+LIB_H += commit-metapack.h
 LIB_H += commit.h
 LIB_H += compat/bswap.h
 LIB_H += compat/cygwin.h
@@ -730,6 +731,7 @@ LIB_OBJS += combine-diff.o
 LIB_OBJS += color.o
 LIB_OBJS += column.o
 LIB_OBJS += combine-diff.o
+LIB_OBJS += commit-metapack.o
 LIB_OBJS += commit.o
 LIB_OBJS += compat/obstack.o
 LIB_OBJS += compat/terminal.o
diff --git a/commit-metapack.c b/commit-metapack.c
new file mode 100644
index 0000000..2c19f48
--- /dev/null
+++ b/commit-metapack.c
@@ -0,0 +1,175 @@
+#include "cache.h"
+#include "commit-metapack.h"
+#include "metapack.h"
+#include "commit.h"
+#include "sha1-lookup.h"
+
+struct commit_metapack {
+	struct metapack mp;
+	uint32_t nr;
+	unsigned char *index;
+	unsigned char *data;
+	struct commit_metapack *next;
+};
+static struct commit_metapack *commit_metapacks;
+
+static struct commit_metapack *alloc_commit_metapack(struct packed_git *pack)
+{
+	struct commit_metapack *it = xcalloc(1, sizeof(*it));
+	uint32_t version;
+
+	if (metapack_init(&it->mp, pack, "commits", &version) < 0) {
+		free(it);
+		return NULL;
+	}
+	if (version != 1) {
+		/*
+		 * This file comes from a more recent git version. Don't bother
+		 * warning the user, as we'll just fallback to reading the
+		 * commits.
+		 */
+		metapack_close(&it->mp);
+		free(it);
+		return NULL;
+	}
+
+	if (it->mp.len < 4) {
+		warning("commit metapack for '%s' is truncated", pack->pack_name);
+		metapack_close(&it->mp);
+		free(it);
+		return NULL;
+	}
+	memcpy(&it->nr, it->mp.data, 4);
+	it->nr = ntohl(it->nr);
+
+	/*
+	 * We need 84 bytes for each entry: sha1(20), date(4), tree(20),
+	 * parents(40).
+	 */
+	if (it->mp.len < (84 * it->nr + 4)) {
+		warning("commit metapack for '%s' is truncated", pack->pack_name);
+		metapack_close(&it->mp);
+		free(it);
+		return NULL;
+	}
+
+	it->index = it->mp.data + 4;
+	it->data = it->index + 20 * it->nr;
+
+	return it;
+}
+
+static void prepare_commit_metapacks(void)
+{
+	static int initialized;
+	struct commit_metapack **tail = &commit_metapacks;
+	struct packed_git *p;
+
+	if (initialized)
+		return;
+
+	prepare_packed_git();
+	for (p = packed_git; p; p = p->next) {
+		struct commit_metapack *it = alloc_commit_metapack(p);
+
+		if (it) {
+			*tail = it;
+			tail = &it->next;
+		}
+	}
+
+	initialized = 1;
+}
+
+int commit_metapack(unsigned char *sha1,
+		    uint32_t *timestamp,
+		    unsigned char **tree,
+		    unsigned char **parent1,
+		    unsigned char **parent2)
+{
+	struct commit_metapack *p;
+
+	prepare_commit_metapacks();
+	for (p = commit_metapacks; p; p = p->next) {
+		unsigned char *data;
+		int pos = sha1_entry_pos(p->index, 20, 0, 0, p->nr, p->nr, sha1);
+		if (pos < 0)
+			continue;
+
+		/* timestamp(4) + tree(20) + parents(40) */
+		data = p->data + 64 * pos;
+		*timestamp = *(uint32_t *)data;
+		*timestamp = ntohl(*timestamp);
+		data += 4;
+		*tree = data;
+		data += 20;
+		*parent1 = data;
+		data += 20;
+		*parent2 = data;
+
+		return 0;
+	}
+
+	return -1;
+}
+
+static void get_commits(struct metapack_writer *mw,
+			const unsigned char *sha1,
+			void *data)
+{
+	struct commit_list ***tail = data;
+	enum object_type type = sha1_object_info(sha1, NULL);
+	struct commit *c;
+
+	if (type != OBJ_COMMIT)
+		return;
+
+	c = lookup_commit(sha1);
+	if (!c || parse_commit(c))
+		die("unable to read commit %s", sha1_to_hex(sha1));
+
+	/*
+	 * Our fixed-size parent list cannot represent root commits, nor
+	 * octopus merges. Just skip those commits, as we can fallback
+	 * in those rare cases to reading the actual commit object.
+	 */
+	if (!c->parents ||
+	    (c->parents && c->parents->next && c->parents->next->next))
+		return;
+
+	*tail = &commit_list_insert(c, *tail)->next;
+}
+
+void commit_metapack_write(const char *idx)
+{
+	struct metapack_writer mw;
+	struct commit_list *commits = NULL, *p;
+	struct commit_list **tail = &commits;
+	uint32_t nr = 0;
+
+	metapack_writer_init(&mw, idx, "commits", 1);
+
+	/* Figure out how many eligible commits we've got in this pack. */
+	metapack_writer_foreach(&mw, get_commits, &tail);
+	for (p = commits; p; p = p->next)
+		nr++;
+	metapack_writer_add_uint32(&mw, nr);
+
+	/* Then write an index of commit sha1s */
+	for (p = commits; p; p = p->next)
+		metapack_writer_add(&mw, p->item->object.sha1, 20);
+
+	/* Followed by the actual date/tree/parents data */
+	for (p = commits; p; p = p->next) {
+		struct commit *c = p->item;
+		metapack_writer_add_uint32(&mw, c->date);
+		metapack_writer_add(&mw, c->tree->object.sha1, 20);
+		metapack_writer_add(&mw, c->parents->item->object.sha1, 20);
+		metapack_writer_add(&mw,
+				    c->parents->next ?
+				    c->parents->next->item->object.sha1 :
+				    null_sha1, 20);
+	}
+
+	metapack_writer_finish(&mw);
+}
diff --git a/commit-metapack.h b/commit-metapack.h
new file mode 100644
index 0000000..4684573
--- /dev/null
+++ b/commit-metapack.h
@@ -0,0 +1,12 @@
+#ifndef METAPACK_COMMIT_H
+#define METAPACK_COMMIT_H
+
+int commit_metapack(unsigned char *sha1,
+		    uint32_t *timestamp,
+		    unsigned char **tree,
+		    unsigned char **parent1,
+		    unsigned char **parent2);
+
+void commit_metapack_write(const char *idx_file);
+
+#endif
-- 
1.8.0.2.16.g72e2fc9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]