[PATCH v2 06/10] archive-tar: stream large blobs to tar file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



t5000 makes sure it produces correct output while t1050 is about not
going over memory limit (i.e. respect core.bigfilethreshold from the
beginning to the end)

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 archive-tar.c       |   44 +++++++++++++++++++++++++++++++++++++++++---
 t/t1050-large.sh    |    4 ++++
 t/t5000-tar-tree.sh |    7 +++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 9060f9a..759e2bf 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -4,6 +4,7 @@
 #include "cache.h"
 #include "tar.h"
 #include "archive.h"
+#include "streaming.h"
 #include "run-command.h"
 
 #define RECORDSIZE	(512)
@@ -80,6 +81,35 @@ static void write_trailer(void)
 }
 
 /*
+ * queues up writes, so that all our write(2) calls write exactly one
+ * full block; pads writes to RECORDSIZE
+ */
+static int stream_blocked(const unsigned char *sha1)
+{
+	struct git_istream *st;
+	enum object_type type;
+	unsigned long sz;
+	char buf[BLOCKSIZE];
+	ssize_t readlen;
+
+	st = open_istream(sha1, &type, &sz, NULL);
+	if (!st)
+		return error("cannot stream blob %s", sha1_to_hex(sha1));
+	for (;;) {
+		readlen = read_istream(st, buf, sizeof(buf));
+		if (readlen <= 0)
+			break;
+		write_blocked(buf, readlen, 1);
+	}
+	close_istream(st);
+
+	/* pad the remaining (if any) to full 512-byte blocks */
+	if (!readlen)
+		write_blocked(NULL, 0, 0);
+	return readlen;
+}
+
+/*
  * pax extended header records have the format "%u %s=%s\n".  %u contains
  * the size of the whole string (including the %u), the first %s is the
  * keyword, the second one is the value.  This function constructs such a
@@ -205,7 +235,11 @@ static int write_tar_entry(struct archiver_args *args,
 	} else
 		memcpy(header.name, path, pathlen);
 
-	if (S_ISLNK(mode) || S_ISREG(mode)) {
+	if (S_ISREG(mode) && !args->convert &&
+	    sha1_object_info(sha1, &size) == OBJ_BLOB &&
+	    size > big_file_threshold)
+		buffer = NULL;
+	else if (S_ISLNK(mode) || S_ISREG(mode)) {
 		enum object_type type;
 		buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
 		if (!buffer)
@@ -237,8 +271,12 @@ static int write_tar_entry(struct archiver_args *args,
 	}
 	strbuf_release(&ext_header);
 	write_blocked(&header, sizeof(header), 0);
-	if (S_ISREG(mode) && buffer && size > 0)
-		write_blocked(buffer, size, 0);
+	if (S_ISREG(mode) && size > 0) {
+		if (buffer)
+			write_blocked(buffer, size, 0);
+		else
+			err = stream_blocked(sha1);
+	}
 	free(buffer);
 	return err;
 }
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 4d127f1..fe47554 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -134,4 +134,8 @@ test_expect_success 'repack' '
 	git repack -ad
 '
 
+test_expect_success 'tar achiving' '
+	git archive --format=tar HEAD >/dev/null
+'
+
 test_done
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 527c9e7..421c356 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -84,6 +84,13 @@ test_expect_success \
     'git archive vs. git tar-tree' \
     'test_cmp b.tar b2.tar'
 
+test_expect_success 'git archive on large files' '
+    git config core.bigfilethreshold 1 &&
+    git archive HEAD >b3.tar &&
+    git config --unset core.bigfilethreshold &&
+    test_cmp b.tar b3.tar
+'
+
 test_expect_success \
     'git archive in a bare repo' \
     '(cd bare.git && git archive HEAD) >b3.tar'
-- 
1.7.8.36.g69ee2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]