Junio C Hamano <gitster@xxxxxxxxx> writes: > The next step would be to tweak zlib_post_call(), git_inflate() and > git_deflate() functions to internally loop and call underlying inflate() > and deflate() when the incoming buffers are larger than 4GB, but that > part is not done in this series (yet). And this is such a patch. I had a draft of this one ready when I sent the previous 6-patch series, but it took embarrassingly long time for me to figure out what went wrong in this patch before I realized that Z_FINISH should not be relayed to the underlying inflate/deflate if we are splitting the request into multiple phases. I consider it still rough, but with this on top of v1.7.5.4, together with the previous 6 patches, it seems to pass a trivial "have a large blob, add, commit, pack, verify, index-pack, fsck" tests, without using either of the recent "let's pass large blob to fast-import" (in 'master') nor "let's stream a blob out without expanding it in core first" (in 'next') changes. -- test script to be stored in /var/tmp/junk or somewhere -- #!/bin/sh set -x rm -fr .git && git init || exit echo | dd bs=1M seek=4800 of=a.big sleep 2 git add a.big eval $(git ls-files -s a.big | sed -e 's/^[0-7]* \([0-9a-f][0-9a-f]\)\([0-9a-f]*\) .*/h=\1 a=\2/') echo $h $a ls -l .git/objects/$h/$a || exit git commit -m initial || exit git fsck || exit git repack -a -d || exit git fsck || exit f=$(find .git/objects/?? -type f) test -z "$f" || exit p=$(find .git/objects/pack -type f -name '*.pack') && i=${p%.pack}.idx && test -f $p && test -f $i || exit mv $p $i . && pp=$(basename $p) && ii=$(basename $i) && test -f $pp && test -f $ii || exit git verify-pack $pp || exit git unpack-objects <$pp || exit ls -l .git/objects/$h/$a || exit rm -fr .git/objects/?? || exit git index-pack --stdin <$pp || exit test -f "$i" || exit cmp $i $ii || exit git cat-file blob "$h$a" >b.big || exit cmp a.big b.big echo all done. exit 0 -- test script ends here -- -- >8 -- Update zlib_post_call() that adjusts the wrapper's notion of avail_in and avail_out to what came back from zlib, so that the callers can feed buffers larger than than 4GB to the API. When underlying inflate/deflate stopped processing because we fed a buffer larger than 4GB limit, detect that case, update the state variables, and let the zlib function work another round. Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx> --- zlib.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 files changed, 58 insertions(+), 20 deletions(-) diff --git a/zlib.c b/zlib.c index fe537e3..3c63d48 100644 --- a/zlib.c +++ b/zlib.c @@ -27,12 +27,11 @@ static const char *zerr_to_string(int status) * limits the size of the buffer we can use to 4GB when interacting * with zlib in a single call to inflate/deflate. */ -#define ZLIB_BUF_MAX ((uInt)-1) +/* #define ZLIB_BUF_MAX ((uInt)-1) */ +#define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */ static inline uInt zlib_buf_cap(unsigned long len) { - if (ZLIB_BUF_MAX < len) - die("working buffer for zlib too large"); - return len; + return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len; } static void zlib_pre_call(git_zstream *s) @@ -47,12 +46,22 @@ static void zlib_pre_call(git_zstream *s) static void zlib_post_call(git_zstream *s) { + unsigned long bytes_consumed; + unsigned long bytes_produced; + + bytes_consumed = s->z.next_in - s->next_in; + bytes_produced = s->z.next_out - s->next_out; + if (s->z.total_out != s->total_out + bytes_produced) + die("BUG: total_out mismatch"); + if (s->z.total_in != s->total_in + bytes_consumed) + die("BUG: total_in mismatch"); + + s->total_out = s->z.total_out; + s->total_in = s->z.total_in; s->next_in = s->z.next_in; s->next_out = s->z.next_out; - s->total_in = s->z.total_in; - s->total_out = s->z.total_out; - s->avail_in = s->z.avail_in; - s->avail_out = s->z.avail_out; + s->avail_in -= bytes_consumed; + s->avail_out -= bytes_produced; } void git_inflate_init(git_zstream *strm) @@ -103,18 +112,32 @@ int git_inflate(git_zstream *strm, int flush) { int status; - zlib_pre_call(strm); - status = inflate(&strm->z, flush); - zlib_post_call(strm); + for (;;) { + zlib_pre_call(strm); + /* Never say Z_FINISH unless we are feeding everything */ + status = inflate(&strm->z, + (strm->z.avail_in != strm->avail_in) + ? 0 : flush); + if (status == Z_MEM_ERROR) + die("inflate: out of memory"); + zlib_post_call(strm); + + /* + * Let zlib work another round, while we can still + * make progress. + */ + if ((strm->avail_out && !strm->z.avail_out) && + (status == Z_OK || status == Z_BUF_ERROR)) + continue; + break; + } + switch (status) { /* Z_BUF_ERROR: normal, needs more space in the output buffer */ case Z_BUF_ERROR: case Z_OK: case Z_STREAM_END: return status; - - case Z_MEM_ERROR: - die("inflate: out of memory"); default: break; } @@ -192,18 +215,33 @@ int git_deflate(git_zstream *strm, int flush) { int status; - zlib_pre_call(strm); - status = deflate(&strm->z, flush); - zlib_post_call(strm); + for (;;) { + zlib_pre_call(strm); + + /* Never say Z_FINISH unless we are feeding everything */ + status = deflate(&strm->z, + (strm->z.avail_in != strm->avail_in) + ? 0 : flush); + if (status == Z_MEM_ERROR) + die("deflate: out of memory"); + zlib_post_call(strm); + + /* + * Let zlib work another round, while we can still + * make progress. + */ + if ((strm->avail_out && !strm->z.avail_out) && + (status == Z_OK || status == Z_BUF_ERROR)) + continue; + break; + } + switch (status) { /* Z_BUF_ERROR: normal, needs more space in the output buffer */ case Z_BUF_ERROR: case Z_OK: case Z_STREAM_END: return status; - - case Z_MEM_ERROR: - die("deflate: out of memory"); default: break; } -- 1.7.6.rc1.118.ge175b4a -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html