[PATCH] Reduce zlib deflate code duplication

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Most of deflation code is simply "given this buffer, just deflate
it". Make a common routine and reuse it instead. 

Exceptions include index-pack, http-push, fast-import,
write_sha1_file, which either make multiple zlib streams, or compress
and calculate sha1/checksum in parallel.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 There is possibly a regression here. remote-curl.c/post_rpc()
 specifies more compression options, although I doubt it affects
 performance. It also reveals places where deflation is expected
 to be always successful.

 archive-zip.c          |   37 ++-----------------------------------
 builtin/pack-objects.c |   24 +++---------------------
 cache.h                |    1 +
 diff.c                 |   29 +++--------------------------
 fast-import.c          |   40 ++++++++++++----------------------------
 remote-curl.c          |   31 ++++---------------------------
 wrapper.c              |   30 ++++++++++++++++++++++++++++++
 7 files changed, 55 insertions(+), 137 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index cf28504..64ec465 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -87,39 +87,6 @@ static void copy_le32(unsigned char *dest, unsigned int n)
 	dest[3] = 0xff & (n >> 030);
 }
 
-static void *zlib_deflate(void *data, unsigned long size,
-		int compression_level, unsigned long *compressed_size)
-{
-	z_stream stream;
-	unsigned long maxsize;
-	void *buffer;
-	int result;
-
-	memset(&stream, 0, sizeof(stream));
-	deflateInit(&stream, compression_level);
-	maxsize = deflateBound(&stream, size);
-	buffer = xmalloc(maxsize);
-
-	stream.next_in = data;
-	stream.avail_in = size;
-	stream.next_out = buffer;
-	stream.avail_out = maxsize;
-
-	do {
-		result = deflate(&stream, Z_FINISH);
-	} while (result == Z_OK);
-
-	if (result != Z_STREAM_END) {
-		free(buffer);
-		return NULL;
-	}
-
-	deflateEnd(&stream);
-	*compressed_size = stream.total_out;
-
-	return buffer;
-}
-
 static int write_zip_entry(struct archiver_args *args,
 		const unsigned char *sha1, const char *path, size_t pathlen,
 		unsigned int mode, void *buffer, unsigned long size)
@@ -164,8 +131,8 @@ static int write_zip_entry(struct archiver_args *args,
 	}
 
 	if (method == 8) {
-		deflated = zlib_deflate(buffer, size, args->compression_level,
-				&compressed_size);
+		compressed_size = size;
+		deflated = git_deflate(buffer, &compressed_size, args->compression_level);
 		if (deflated && compressed_size - 6 < size) {
 			/* ZLIB --> raw compressed data (see RFC 1950) */
 			/* CMF and FLG ... */
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 0e81673..20e2b6e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -130,28 +130,10 @@ static void *get_delta(struct object_entry *entry)
 
 static unsigned long do_compress(void **pptr, unsigned long size)
 {
-	z_stream stream;
-	void *in, *out;
-	unsigned long maxsize;
-
-	memset(&stream, 0, sizeof(stream));
-	deflateInit(&stream, pack_compression_level);
-	maxsize = deflateBound(&stream, size);
-
-	in = *pptr;
-	out = xmalloc(maxsize);
+	void *out = git_deflate(*pptr, &size, pack_compression_level);
+	free(*pptr);
 	*pptr = out;
-
-	stream.next_in = in;
-	stream.avail_in = size;
-	stream.next_out = out;
-	stream.avail_out = maxsize;
-	while (deflate(&stream, Z_FINISH) == Z_OK)
-		; /* nothing */
-	deflateEnd(&stream);
-
-	free(in);
-	return stream.total_out;
+	return size;
 }
 
 /*
diff --git a/cache.h b/cache.h
index eb77e1d..c8ab52d 100644
--- a/cache.h
+++ b/cache.h
@@ -22,6 +22,7 @@
 void git_inflate_init(z_streamp strm);
 void git_inflate_end(z_streamp strm);
 int git_inflate(z_streamp strm, int flush);
+void *git_deflate(void *buf, unsigned long *size, int compression_level);
 
 #if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
 #define DTYPE(de)	((de)->d_type)
diff --git a/diff.c b/diff.c
index 6fb97d4..18f2f55 100644
--- a/diff.c
+++ b/diff.c
@@ -1674,30 +1674,6 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
 	}
 }
 
-static unsigned char *deflate_it(char *data,
-				 unsigned long size,
-				 unsigned long *result_size)
-{
-	int bound;
-	unsigned char *deflated;
-	z_stream stream;
-
-	memset(&stream, 0, sizeof(stream));
-	deflateInit(&stream, zlib_compression_level);
-	bound = deflateBound(&stream, size);
-	deflated = xmalloc(bound);
-	stream.next_out = deflated;
-	stream.avail_out = bound;
-
-	stream.next_in = (unsigned char *)data;
-	stream.avail_in = size;
-	while (deflate(&stream, Z_FINISH) == Z_OK)
-		; /* nothing */
-	deflateEnd(&stream);
-	*result_size = stream.total_out;
-	return deflated;
-}
-
 static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char *prefix)
 {
 	void *cp;
@@ -1713,7 +1689,8 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
 	 * whichever is smaller.
 	 */
 	delta = NULL;
-	deflated = deflate_it(two->ptr, two->size, &deflate_size);
+	deflate_size = two->size;
+	deflated = git_deflate(two->ptr, &deflate_size, zlib_compression_level);
 	if (one->size && two->size) {
 		delta = diff_delta(one->ptr, one->size,
 				   two->ptr, two->size,
@@ -1721,7 +1698,7 @@ static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, char
 		if (delta) {
 			void *to_free = delta;
 			orig_size = delta_size;
-			delta = deflate_it(delta, delta_size, &delta_size);
+			delta = git_deflate(delta, &delta_size, zlib_compression_level);
 			free(to_free);
 		}
 	}
diff --git a/fast-import.c b/fast-import.c
index dd51ac4..28bac18 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -993,7 +993,7 @@ static int store_object(
 	unsigned char sha1[20];
 	unsigned long hdrlen, deltalen;
 	git_SHA_CTX c;
-	z_stream s;
+	unsigned long compressed_size;
 
 	hdrlen = sprintf((char *)hdr,"%s %lu", typename(type),
 		(unsigned long)dat->len) + 1;
@@ -1025,24 +1025,13 @@ static int store_object(
 	} else
 		delta = NULL;
 
-	memset(&s, 0, sizeof(s));
-	deflateInit(&s, pack_compression_level);
-	if (delta) {
-		s.next_in = delta;
-		s.avail_in = deltalen;
-	} else {
-		s.next_in = (void *)dat->buf;
-		s.avail_in = dat->len;
-	}
-	s.avail_out = deflateBound(&s, s.avail_in);
-	s.next_out = out = xmalloc(s.avail_out);
-	while (deflate(&s, Z_FINISH) == Z_OK)
-		/* nothing */;
-	deflateEnd(&s);
+	compressed_size = delta ? deltalen : dat->len;
+	out = git_deflate(delta ? delta : dat->buf, &compressed_size,
+			  pack_compression_level);
 
 	/* Determine if we should auto-checkpoint. */
-	if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
-		|| (pack_size + 60 + s.total_out) < pack_size) {
+	if ((max_packsize && (pack_size + 60 + compressed_size) > max_packsize)
+		|| (pack_size + 60 + compressed_size) < pack_size) {
 
 		/* This new object needs to *not* have the current pack_id. */
 		e->pack_id = pack_id + 1;
@@ -1053,15 +1042,10 @@ static int store_object(
 			free(delta);
 			delta = NULL;
 
-			memset(&s, 0, sizeof(s));
-			deflateInit(&s, pack_compression_level);
-			s.next_in = (void *)dat->buf;
-			s.avail_in = dat->len;
-			s.avail_out = deflateBound(&s, s.avail_in);
-			s.next_out = out = xrealloc(out, s.avail_out);
-			while (deflate(&s, Z_FINISH) == Z_OK)
-				/* nothing */;
-			deflateEnd(&s);
+			free(out);
+			compressed_size = dat->len;
+			out = git_deflate(dat->buf, &compressed_size,
+					  pack_compression_level);
 		}
 	}
 
@@ -1096,8 +1080,8 @@ static int store_object(
 		pack_size += hdrlen;
 	}
 
-	sha1write(pack_file, out, s.total_out);
-	pack_size += s.total_out;
+	sha1write(pack_file, out, compressed_size);
+	pack_size += compressed_size;
 
 	e->idx.crc32 = crc32_end(pack_file);
 
diff --git a/remote-curl.c b/remote-curl.c
index 04d4813..e0d07fc 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -420,33 +420,10 @@ static int post_rpc(struct rpc_state *rpc)
 		 * we can try to deflate it ourselves, this may save on.
 		 * the transfer time.
 		 */
-		size_t size;
-		z_stream stream;
-		int ret;
-
-		memset(&stream, 0, sizeof(stream));
-		ret = deflateInit2(&stream, Z_BEST_COMPRESSION,
-				Z_DEFLATED, (15 + 16),
-				8, Z_DEFAULT_STRATEGY);
-		if (ret != Z_OK)
-			die("cannot deflate request; zlib init error %d", ret);
-		size = deflateBound(&stream, rpc->len);
-		gzip_body = xmalloc(size);
-
-		stream.next_in = (unsigned char *)rpc->buf;
-		stream.avail_in = rpc->len;
-		stream.next_out = (unsigned char *)gzip_body;
-		stream.avail_out = size;
-
-		ret = deflate(&stream, Z_FINISH);
-		if (ret != Z_STREAM_END)
-			die("cannot deflate request; zlib deflate error %d", ret);
-
-		ret = deflateEnd(&stream);
-		if (ret != Z_OK)
-			die("cannot deflate request; zlib end error %d", ret);
-
-		size = stream.total_out;
+		unsigned long size = rpc->len;
+		gzip_body = git_deflate(rpc->buf, &size, Z_BEST_COMPRESSION);
+		if (!gzip_body)
+			die("cannot deflate request; zlib deflate error");
 
 		headers = curl_slist_append(headers, "Content-Encoding: gzip");
 		curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, gzip_body);
diff --git a/wrapper.c b/wrapper.c
index fd8ead3..fda7279 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -289,6 +289,36 @@ int git_inflate(z_streamp strm, int flush)
 	return ret;
 }
 
+void *git_deflate(void *in, unsigned long *size, int compression_level)
+{
+	z_stream stream;
+	void *out;
+	unsigned long maxsize;
+	int result;
+
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, compression_level);
+	maxsize = deflateBound(&stream, *size);
+
+	out = xmalloc(maxsize);
+	stream.next_in = in;
+	stream.avail_in = *size;
+	stream.next_out = out;
+	stream.avail_out = maxsize;
+	while ((result = deflate(&stream, Z_FINISH)) == Z_OK)
+		; /* nothing */
+
+	if (result != Z_STREAM_END) {
+		*size = 0;
+		free(out);
+		return NULL;
+	}
+
+	deflateEnd(&stream);
+	*size = stream.total_out;
+	return out;
+}
+
 int odb_mkstemp(char *template, size_t limit, const char *pattern)
 {
 	int fd;
-- 
1.7.1.rc1.69.g24c2f7

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]