[PATCH v3 5/9] archive-tar: stream large blobs to tar file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



t5000 verifies output while t1050 makes sure the command always
respects core.bigfilethreshold

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 archive-tar.c       |   56 ++++++++++++++++++++++++++++++++++++++++++++++----
 t/t1050-large.sh    |    4 +++
 t/t5000-tar-tree.sh |    6 +++++
 3 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 3be0cdf..93387ea 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -4,6 +4,7 @@
 #include "cache.h"
 #include "tar.h"
 #include "archive.h"
+#include "streaming.h"
 #include "run-command.h"
 
 #define RECORDSIZE	(512)
@@ -30,10 +31,9 @@ static void write_if_needed(void)
  * queues up writes, so that all our write(2) calls write exactly one
  * full block; pads writes to RECORDSIZE
  */
-static void write_blocked(const void *data, unsigned long size)
+static void do_write_blocked(const void *data, unsigned long size)
 {
 	const char *buf = data;
-	unsigned long tail;
 
 	if (offset) {
 		unsigned long chunk = BLOCKSIZE - offset;
@@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size)
 		memcpy(block + offset, buf, size);
 		offset += size;
 	}
+}
+
+static void finish_record(void)
+{
+	unsigned long tail;
 	tail = offset % RECORDSIZE;
 	if (tail)  {
 		memset(block + offset, 0, RECORDSIZE - tail);
@@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size)
 	write_if_needed();
 }
 
+static void write_blocked(const void *data, unsigned long size)
+{
+	do_write_blocked(data, size);
+	finish_record();
+}
+
 /*
  * The end of tar archives is marked by 2*512 nul bytes and after that
  * follows the rest of the block (if any).
@@ -78,6 +89,33 @@ static void write_trailer(void)
 }
 
 /*
+ * queues up writes, so that all our write(2) calls write exactly one
+ * full block; pads writes to RECORDSIZE
+ */
+static int stream_blocked(const unsigned char *sha1)
+{
+	struct git_istream *st;
+	enum object_type type;
+	unsigned long sz;
+	char buf[BLOCKSIZE];
+	ssize_t readlen;
+
+	st = open_istream(sha1, &type, &sz, NULL);
+	if (!st)
+		return error("cannot stream blob %s", sha1_to_hex(sha1));
+	for (;;) {
+		readlen = read_istream(st, buf, sizeof(buf));
+		if (readlen <= 0)
+			break;
+		do_write_blocked(buf, readlen);
+	}
+	close_istream(st);
+	if (!readlen)
+		finish_record();
+	return readlen;
+}
+
+/*
  * pax extended header records have the format "%u %s=%s\n".  %u contains
  * the size of the whole string (including the %u), the first %s is the
  * keyword, the second one is the value.  This function constructs such a
@@ -203,7 +241,11 @@ static int write_tar_entry(struct archiver_args *args,
 	} else
 		memcpy(header.name, path, pathlen);
 
-	if (S_ISLNK(mode) || S_ISREG(mode)) {
+	if (S_ISREG(mode) && !args->convert &&
+	    sha1_object_info(sha1, &size) == OBJ_BLOB &&
+	    size > big_file_threshold)
+		buffer = NULL;
+	else if (S_ISLNK(mode) || S_ISREG(mode)) {
 		enum object_type type;
 		buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
 		if (!buffer)
@@ -235,8 +277,12 @@ static int write_tar_entry(struct archiver_args *args,
 	}
 	strbuf_release(&ext_header);
 	write_blocked(&header, sizeof(header));
-	if (S_ISREG(mode) && buffer && size > 0)
-		write_blocked(buffer, size);
+	if (S_ISREG(mode) && size > 0) {
+		if (buffer)
+			write_blocked(buffer, size);
+		else
+			err = stream_blocked(sha1);
+	}
 	free(buffer);
 	return err;
 }
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 4d127f1..fe47554 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -134,4 +134,8 @@ test_expect_success 'repack' '
 	git repack -ad
 '
 
+test_expect_success 'tar achiving' '
+	git archive --format=tar HEAD >/dev/null
+'
+
 test_done
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 527c9e7..d9b997f 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -84,6 +84,12 @@ test_expect_success \
     'git archive vs. git tar-tree' \
     'test_cmp b.tar b2.tar'
 
+test_expect_success 'git archive on large files' '
+    test_config core.bigfilethreshold 1 &&
+    git archive HEAD >b3.tar &&
+    test_cmp b.tar b3.tar
+'
+
 test_expect_success \
     'git archive in a bare repo' \
     '(cd bare.git && git archive HEAD) >b3.tar'
-- 
1.7.3.1.256.g2539c.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]