[PATCH 09/16] subtree: rewrite incoming commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This adds the main function, subtree_import(), which is intended to be
used by "git clone".

Because subtree packs are not complete. They are barely usable. Git
client will cry out missing objects here and there... Theortically,
client code could be adapted to only look for objects within
subtree. That was painful to try.

Alternatively, subtree_import() rewrites commits to have only the
specified subtree, sealing all broken path. Git client now happily
works with these new commits.

However, users might not, because it's different commit, different
SHA-1. They can't use those SHA-1 to communicate within their team. To
work around this, all original commits are replaced by new commits
using git-replace.

Of course this is still not perfect. Users may be able to send SHA-1
around, which is consistent. They may not do the same with tree SHA-1.

Rewriting/replacing commits takes time and space. For replacing _all_
commits, the current replace mechanism is not suitable, which is why
subtree_lookup_object() was introduced in previous patches.

For rewriting, writing a huge number of objects is slow. So
subtree_import() builds a pack for all new objects. These packs are
not optimized. But it does reduce wait time for rewriting.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 subtree.c |  244 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 subtree.h |    1 +
 2 files changed, 245 insertions(+), 0 deletions(-)

diff --git a/subtree.c b/subtree.c
index 601d827..8c075be 100644
--- a/subtree.c
+++ b/subtree.c
@@ -115,3 +115,247 @@ const unsigned char *subtree_lookup_object(const unsigned char *sha1)
 		return subtree_commit[pos]->sha1[1];
 	return sha1;
 }
+
+static unsigned long do_compress(void **pptr, unsigned long size)
+{
+	z_stream stream;
+	void *in, *out;
+	unsigned long maxsize;
+
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+	maxsize = deflateBound(&stream, size);
+
+	in = *pptr;
+	out = xmalloc(maxsize);
+	*pptr = out;
+
+	stream.next_in = in;
+	stream.avail_in = size;
+	stream.next_out = out;
+	stream.avail_out = maxsize;
+	while (deflate(&stream, Z_FINISH) == Z_OK)
+		; /* nothing */
+	deflateEnd(&stream);
+
+	return stream.total_out;
+}
+
+static int nr_written;
+static int add_sha1_to_pack(int fd, void *buf, unsigned long size, enum object_type type)
+{
+	unsigned long datalen;
+	unsigned hdrlen;
+	unsigned char header[10];
+
+	datalen = do_compress(&buf, size);
+	hdrlen = encode_in_pack_object_header(type, size, header);
+	write(fd, header, hdrlen);
+	write(fd, buf, datalen);
+	nr_written++;
+	free(buf);
+	return 0;
+}
+
+/*
+ * Take sha1 of a tree, rewrite it to only return the prefix and return
+ * the newsha1.
+ *
+ * If if is zero, write to object store. If fd is greater than zero,
+ * it's a pack file handle.
+ */
+static int narrow_tree(const unsigned char *sha1, unsigned char *newsha1,
+		       const char *prefix, int fd)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+	struct strbuf buffer;
+	const char *slash;
+	int subtree_len;
+	enum object_type type;
+	unsigned long size;
+	char *tree;
+	struct object *obj;
+
+	slash = strchr(prefix, '/');
+	subtree_len = slash ? slash - prefix : strlen(prefix);
+
+	tree = read_sha1_file(sha1, &type, &size);
+	if (type != OBJ_TREE)
+		die("%s is not a tree", sha1_to_hex(sha1));
+
+	init_tree_desc(&desc, tree, size);
+	strbuf_init(&buffer, 1024);
+	while (tree_entry(&desc, &entry)) {
+		if (!S_ISDIR(entry.mode))
+			continue;
+
+		if (subtree_len == strlen(entry.path) &&
+		    !strncmp(entry.path, prefix, subtree_len)) {
+			unsigned char newtree_sha1[20];
+
+			if (slash && slash[1]) /* trailing slash does not count */
+				narrow_tree(entry.sha1, newtree_sha1, prefix+subtree_len+1, fd);
+			else
+				memcpy(newtree_sha1, entry.sha1, 20);
+
+			strbuf_addf(&buffer, "%o %.*s%c", entry.mode, strlen(entry.path), entry.path, '\0');
+			strbuf_add(&buffer, newtree_sha1, 20);
+			break;
+		}
+	}
+	free(tree);
+
+	if (fd == 0) {
+		if (write_sha1_file(buffer.buf, buffer.len, tree_type, newsha1)) {
+			error("Could not write replaced tree for %s", sha1_to_hex(sha1));
+			strbuf_release(&buffer);
+			return 1;
+		}
+		strbuf_release(&buffer);
+		return 0;
+	}
+
+	hash_sha1_file(buffer.buf, buffer.len, tree_type, newsha1);
+	obj = (struct object *)lookup_tree(newsha1);
+	if (fd > 0 &&
+	    !(obj->flags & SEEN) &&
+	    add_sha1_to_pack(fd, buffer.buf, buffer.len, OBJ_TREE)) {
+		error("Could not write replaced tree for %s", sha1_to_hex(sha1));
+		strbuf_release(&buffer);
+		return 1;
+	}
+	obj->flags |= SEEN;
+
+	strbuf_release(&buffer);
+	return 0;
+}
+
+/*
+ * Take sha1 of a commit, rewrite its tree using narrow_tree(), then
+ * add a replace entry to file pointer fp (which is $GIT_DIR/subtree).
+ *
+ * Also update replace-object database so that the given sha1 can be
+ * replaced with the new one right after this function returns.
+ */
+static int shadow_commit(const unsigned char *sha1, const char *prefix, int fd, FILE *fp)
+{
+	unsigned char newsha1[20], treesha1[20];
+	enum object_type type;
+	unsigned long size;
+	void *buffer;
+	struct object *obj;
+	int saved_read_replace_refs = read_replace_refs;
+
+	read_replace_refs = 0;
+	buffer = read_sha1_file(sha1, &type, &size);
+	read_replace_refs = saved_read_replace_refs;
+	get_sha1_hex(buffer+5, treesha1);
+
+	if (!buffer || type != OBJ_COMMIT ||
+	    narrow_tree(treesha1, newsha1, prefix, fd)) {
+		free(buffer);
+		error("Failed to narrow tree for commit %s", sha1_to_hex(sha1));
+		return 1;
+	}
+
+	/* replace new tree in */
+	memcpy((char*)buffer+5, sha1_to_hex(newsha1), 40);
+
+	if (fd == 0) {
+		if (write_sha1_file(buffer, size, commit_type, newsha1)) {
+			free(buffer);
+			error("Could not write replaced commit for %s", sha1_to_hex(sha1));
+			return 1;
+		}
+	}
+	else {
+		hash_sha1_file(buffer, size, commit_type, newsha1);
+		obj = (struct object *)lookup_commit(newsha1);
+		if (fd > 0 &&
+		    !(obj->flags & SEEN) &&
+		    add_sha1_to_pack(fd, buffer, size, OBJ_COMMIT)) {
+			free(buffer);
+			error("Could not write replaced commit for %s", sha1_to_hex(sha1));
+			return 1;
+		}
+		obj->flags |= SEEN;
+	}
+
+	if (fp) {
+		char buf[82];
+		memcpy(buf, sha1_to_hex(sha1), 40);
+		buf[40] = ' ';
+		memcpy(buf+41, sha1_to_hex(newsha1), 40);
+		buf[81] = '\n';
+		fwrite(buf, 82, 1, fp);
+	}
+	free(buffer);
+
+	return 0;
+}
+
+/*
+ * Rewrite all reachable commits in repo using shadow_commit().
+ * Write out the pack that contains new tree/commit objects.
+ */
+void subtree_import()
+{
+	const char *args[] = {"rev-list", "--all", NULL};
+	struct pack_header hdr;
+	struct progress *ps;
+	struct rev_info revs;
+	struct commit *c;
+	unsigned char sha1[20];
+	unsigned commit_nr = 0;
+	char *pack_tmp_name;
+	char tmpname[PATH_MAX];
+	int pack_fd, i;
+	FILE *fp;
+	char cmd[1024];
+
+	/* Packing */
+	init_revisions(&revs, NULL);
+	setup_revisions(2, args, &revs, NULL);
+	if (prepare_revision_walk(&revs))
+		die("revision walk setup failed");
+	fp = fopen(git_path("subtree"), "w+");
+
+	pack_fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+	pack_tmp_name = xstrdup(tmpname);
+
+	hdr.hdr_signature = htonl(PACK_SIGNATURE);
+	hdr.hdr_version = htonl(PACK_VERSION);
+	hdr.hdr_entries = htonl(0);
+	write(pack_fd, &hdr, sizeof(hdr));
+
+	ps = start_progress("Preparing subtree commits", 0);
+	while ((c = get_revision(&revs)) != NULL) {
+		if (shadow_commit(c->object.sha1, core_subtree, pack_fd, fp))
+			die("Failed to shadow commit %s", c->object.sha1);
+		display_progress(ps, ++commit_nr);
+	}
+	stop_progress(&ps);
+	fclose(fp);
+	fixup_pack_header_footer(pack_fd, sha1, pack_tmp_name, nr_written, NULL, 0);
+	close(pack_fd);
+	sprintf(cmd, "git index-pack --stdin < %s", pack_tmp_name);
+	system(cmd);
+	unlink(pack_tmp_name);
+
+	reprepare_packed_git();
+	if (subtree_commit)
+		free(subtree_commit);
+	prepare_subtree_commit();
+
+	/* Invalidate all replaced commits */
+	for (i = 0; i < subtree_commit_nr; i++) {
+		/* lookup_commit() would create new objects, we don't want that */
+		c = (struct commit *)lookup_object(subtree_commit[i]->sha1[0]);
+		if (c)
+			invalidate_commit(c);
+	}
+
+	if (revs.pending.nr)
+		free(revs.pending.objects);
+}
diff --git a/subtree.h b/subtree.h
index 157153a..3512e2a 100644
--- a/subtree.h
+++ b/subtree.h
@@ -1,2 +1,3 @@
 void prepare_subtree_commit();
 const unsigned char *subtree_lookup_object(const unsigned char *sha1);
+void subtree_import();
-- 
1.7.1.rc1.69.g24c2f7

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]