[RFC] fast-import: invalidate pack_id references after loosening

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When loosening a pack, the current pack_id gets reused when
checkpointing and the import does not terminate.  This causes
problems after checkpointing as the object table, branch, and
tag lists still contains pre-checkpoint references to the
recycled pack_id.

Merely clearing the object_table as suggested by Jeff King in
http://mid.gmane.org/20160517121330.GA7346@xxxxxxxxxxxxxxxxxxxxx
is insufficient as the marks set still contains references
to object entries.

Wrong pack_id references branch and tags lists do not cause
errors, but can lead to misleading crash reports and core dumps,
so they are also invalidated.

Signed-off-by: Eric Wong <e@xxxxxxxxx>
---
 I started writing a standalone test case; but testing with
 original failing cases would be greatly appreciated.

 Still learning my way around the fast-import code...
 Thanks.

 fast-import.c                       | 31 +++++++++++++++++++-
 t/t9302-fast-import-unpack-limit.sh | 57 +++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/fast-import.c b/fast-import.c
index 0e8bc6a..b9db4b6 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -597,6 +597,33 @@ static struct object_entry *insert_object(unsigned char *sha1)
 	return e;
 }
 
+static void invalidate_pack_id(unsigned int id)
+{
+	unsigned int h;
+	unsigned long lu;
+	struct tag *t;
+
+	for (h = 0; h < ARRAY_SIZE(object_table); h++) {
+		struct object_entry *e;
+
+		for (e = object_table[h]; e; e = e->next)
+			if (e->pack_id == id)
+				e->pack_id = MAX_PACK_ID;
+	}
+
+	for (lu = 0; lu < branch_table_sz; lu++) {
+		struct branch *b;
+
+		for (b = branch_table[lu]; b; b = b->table_next_branch)
+			if (b->pack_id == id)
+				b->pack_id = MAX_PACK_ID;
+	}
+
+	for (t = first_tag; t; t = t->next_tag)
+		if (t->pack_id == id)
+			t->pack_id = MAX_PACK_ID;
+}
+
 static unsigned int hc_str(const char *s, size_t len)
 {
 	unsigned int r = 0;
@@ -993,8 +1020,10 @@ static void end_packfile(void)
 				    cur_pack_sha1, pack_size);
 
 		if (object_count <= unpack_limit) {
-			if (!loosen_small_pack(pack_data))
+			if (!loosen_small_pack(pack_data)) {
+				invalidate_pack_id(pack_id);
 				goto discard_pack;
+			}
 		}
 
 		close(pack_data->pack_fd);
diff --git a/t/t9302-fast-import-unpack-limit.sh b/t/t9302-fast-import-unpack-limit.sh
index 0f686d2..a04de14 100755
--- a/t/t9302-fast-import-unpack-limit.sh
+++ b/t/t9302-fast-import-unpack-limit.sh
@@ -45,4 +45,61 @@ test_expect_success 'bigger packs are preserved' '
 	test $(find .git/objects/pack -type f | wc -l) -eq 2
 '
 
+test_expect_success 'lookups after checkpoint works' '
+	hello_id=$(echo hello | git hash-object --stdin -t blob) &&
+	id="$GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE" &&
+	before=$(git rev-parse refs/heads/master^0) &&
+	(
+		cat <<-INPUT_END &&
+		blob
+		mark :1
+		data 6
+		hello
+
+		commit refs/heads/master
+		mark :2
+		committer $id
+		data <<COMMIT
+		checkpoint after this
+		COMMIT
+		from refs/heads/master^0
+		M 100644 :1 hello
+
+		# pre-checkpoint
+		cat-blob :1
+		cat-blob $hello_id
+		checkpoint
+		# post-checkpoint
+		cat-blob :1
+		cat-blob $hello_id
+		INPUT_END
+
+		n=0 &&
+		from=$before &&
+		while test x"$from" = x"$before"
+		do
+			if test $n -gt 30
+			then
+				echo >&2 "checkpoint did not update branch"
+				exit 1
+			else
+				n=$(($n + 1))
+			fi &&
+			sleep 1 &&
+			from=$(git rev-parse refs/heads/master^0)
+		done &&
+		cat <<-INPUT_END &&
+		commit refs/heads/master
+		committer $id
+		data <<COMMIT
+		make sure from "unpacked sha1 reference" works, too
+		COMMIT
+		from $from
+		INPUT_END
+		echo done
+	) | git -c fastimport.unpackLimit=100 fast-import --done &&
+	test $(find .git/objects/?? -type f | wc -l) -eq 6 &&
+	test $(find .git/objects/pack -type f | wc -l) -eq 2
+'
+
 test_done
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]