[PATCH 2/3] bulk-checkin: do not write the same object twice

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---
 bulk-checkin.c   |   28 ++++++++++++++++++++++++----
 t/t1050-large.sh |   20 +++++++++++++-------
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/bulk-checkin.c b/bulk-checkin.c
index c7e693e..82166ba 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -52,6 +52,17 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
 	reprepare_packed_git();
 }
 
+static int already_written(struct bulk_checkin_state *state, unsigned char sha1[])
+{
+	int i;
+
+	/* Might want to keep the list sorted */
+	for (i = 0; i < state->nr_written; i++)
+		if (!hashcmp(state->written[i]->sha1, sha1))
+			return 1;
+	return 0;
+}
+
 static void deflate_to_pack(struct bulk_checkin_state *state,
 			    unsigned char sha1[],
 			    int fd, size_t size, enum object_type type,
@@ -64,6 +75,7 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 	int write_object = (flags & HASH_WRITE_OBJECT);
 	int status = Z_OK;
 	struct pack_idx_entry *idx = NULL;
+	struct sha1file_checkpoint checkpoint;
 
 	hdrlen = sprintf((char *)obuf, "%s %" PRIuMAX,
 			 typename(type), (uintmax_t)size) + 1;
@@ -73,6 +85,7 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 	if (write_object) {
 		idx = xcalloc(1, sizeof(*idx));
 		idx->offset = state->offset;
+		sha1file_checkpoint(state->f, &checkpoint);
 		crc32_begin(state->f);
 	}
 	memset(&s, 0, sizeof(s));
@@ -121,10 +134,17 @@ static void deflate_to_pack(struct bulk_checkin_state *state,
 	git_SHA1_Final(sha1, &ctx);
 	if (write_object) {
 		idx->crc32 = crc32_end(state->f);
-		hashcpy(idx->sha1, sha1);
-		ALLOC_GROW(state->written,
-			   state->nr_written + 1, state->alloc_written);
-		state->written[state->nr_written++] = idx;
+
+		if (already_written(state, sha1)) {
+			sha1file_truncate(state->f, &checkpoint);
+			state->offset = checkpoint.offset;
+			free(idx);
+		} else {
+			hashcpy(idx->sha1, sha1);
+			ALLOC_GROW(state->written,
+				   state->nr_written + 1, state->alloc_written);
+			state->written[state->nr_written++] = idx;
+		}
 	}
 }
 
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 36def25..fbd5ced 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -7,22 +7,28 @@ test_description='adding and checking out large blobs'
 
 test_expect_success setup '
 	git config core.bigfilethreshold 200k &&
-	echo X | dd of=large bs=1k seek=2000 &&
+	echo X | dd of=large1 bs=1k seek=2000 &&
+	echo X | dd of=large2 bs=1k seek=2000 &&
 	echo Y | dd of=huge bs=1k seek=2500
 '
 
 test_expect_success 'add a large file or two' '
-	git add large huge &&
+	git add large1 huge large2 &&
 	# make sure we got a single packfile and no loose objects
-	bad= count=0 &&
+	bad= count=0 idx= &&
 	for p in .git/objects/pack/pack-*.pack
 	do
 		count=$(( $count + 1 ))
-		test -f "$p" && continue
+		if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
+		then
+			continue
+		fi
 		bad=t
 	done &&
 	test -z "$bad" &&
 	test $count = 1 &&
+	cnt=$(git show-index <"$idx" | wc -l) &&
+	test $cnt = 2 &&
 	for l in .git/objects/??/??????????????????????????????????????
 	do
 		test -f "$l" || continue
@@ -32,10 +38,10 @@ test_expect_success 'add a large file or two' '
 '
 
 test_expect_success 'checkout a large file' '
-	large=$(git rev-parse :large) &&
-	git update-index --add --cacheinfo 100644 $large another &&
+	large1=$(git rev-parse :large1) &&
+	git update-index --add --cacheinfo 100644 $large1 another &&
 	git checkout another &&
-	cmp large another ;# this must not be test_cmp
+	cmp large1 another ;# this must not be test_cmp
 '
 
 test_done
-- 
1.7.8.rc3.111.g7d421

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]