A large blob will be read twice. One for calculating crc32, one for actual writing. Large blobs are written uncompressed for simplicity. Writing compressed large blobs is possible. But a naive implementation would need to decompress/compress the blob twice: one to calculate compressed size, one for actual writing, assuming compressed blobs are still over large file limit. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- I think we could extract compressed size from pack index, then stream the compressed blob directly from pack to zip file. But that makes git-archive sensitive to pack format. And to be honest I don't care that much about large file support to do it. This patch is good enough for me. Documentation/git-archive.txt | 3 ++ archive-zip.c | 42 ++++++++++++++++++++++++++++++++++++++++- t/t1050-large.sh | 4 +++ 3 files changed, 48 insertions(+), 1 deletions(-) diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index ac7006e..6df85a6 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -120,6 +120,9 @@ tar.<format>.remote:: user-defined formats, but true for the "tar.gz" and "tgz" formats. +core.bigFileThreshold:: + Files larger than this size are stored uncompressed in zip format. + ATTRIBUTES ---------- diff --git a/archive-zip.c b/archive-zip.c index f8039ba..ee58bda 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -3,6 +3,7 @@ */ #include "cache.h" #include "archive.h" +#include "streaming.h" static int zip_date; static int zip_time; @@ -120,6 +121,29 @@ static void *zlib_deflate(void *data, unsigned long size, return buffer; } +static int crc32_stream(const unsigned char *sha1, unsigned long *crc) +{ + struct git_istream *st; + enum object_type type; + unsigned long sz; + + st = open_istream(sha1, &type, &sz, NULL); + if (!st) + return error("cannot stream blob %s", sha1_to_hex(sha1)); + for (;;) { + char buf[1024]; + ssize_t readlen; + + readlen = read_istream(st, buf, sizeof(buf)); + + if (readlen <= 0) + return readlen; + *crc = crc32(*crc, (unsigned char*)buf, readlen); + } + close_istream(st); + return 0; +} + static int write_zip_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, @@ -153,6 +177,19 @@ static int write_zip_entry(struct archiver_args *args, compressed_size = 0; buffer = NULL; size = 0; + } else if (!args->convert && S_ISREG(mode) && + sha1_object_info(sha1, &size) == OBJ_BLOB && + size > big_file_threshold) { + buffer = NULL; + method = 0; + attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : + (mode & 0111) ? ((mode) << 16) : 0; + if (crc32_stream(sha1, &crc) < 0) + return error("failed to calculate crc32 from blob %s, SHA1 %s", + path, sha1_to_hex(sha1)); + out = buffer; + uncompressed_size = size; + compressed_size = size; } else if (S_ISREG(mode) || S_ISLNK(mode)) { enum object_type type; buffer = sha1_file_to_archive(args, path, sha1, mode, &type, &size); @@ -234,7 +271,10 @@ static int write_zip_entry(struct archiver_args *args, write_or_die(1, path, pathlen); zip_offset += pathlen; if (compressed_size > 0) { - write_or_die(1, out, compressed_size); + if (out) + write_or_die(1, out, compressed_size); + else + stream_blob_to_fd(1, sha1, NULL, 0); zip_offset += compressed_size; } diff --git a/t/t1050-large.sh b/t/t1050-large.sh index fe47554..458fdde 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -138,4 +138,8 @@ test_expect_success 'tar achiving' ' git archive --format=tar HEAD >/dev/null ' +test_expect_success 'zip achiving' ' + git archive --format=zip HEAD >/dev/null +' + test_done -- 1.7.8.36.g69ee2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html