[RFD/PATCH] Implement pack.compression and pack-objects --compression=N

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Git's object store and packing are optimized for
* Expensive repo to repo bandwidth; and
* Small-ish files
This justifies extensive use of compression.

In a multi-developer *office* with inter-repository
transfers occurring over a 100Mb+ LAN,  there is less
reason to compress files and slow down response times.
Response times suffer even more when large files are involved.
However,  *off-line* pack compression may still be
desirable to reduce storage space.

Consequently,  for such a usage pattern it is useful
to specify different compression levels for loose
objects and packs.  This patch implements a config
variable pack.compression in addition to the existing
core.compression,  meant to be used for repacking.
It also adds --compression=N to pack-objects,
meant for push/pull/fetch,  if different,  or if different
on a per-repository basis.

** THIS PATCH IS UNTESTED AND MEANT FOR DISCUSSION. **
git-repack.sh might also need to be modified,
and how to pass --compression=N during push/pull/fetch
has not been investigated.

This applies on top of the git-repack --max-pack-size patchset.

Signed-off-by: Dana L. How <danahow@xxxxxxxxx>
---
 builtin-pack-objects.c |   56 ++++++++++++++++++++++++++++++++++++------------
 1 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 69fec34..b663c15 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -70,6 +70,7 @@ static uint32_t pack_size_limit;
 static int pack_to_stdout;
 static int num_preferred_base;
 static struct progress progress_state;
+static int pack_compression_level, pack_compression_seen;
 
 /*
  * The object names in objects array are hashed with this hashtable,
@@ -414,6 +415,16 @@ static unsigned long write_object(struct sha1file *f,
 	/* write limit if limited packsize and not first object */
 	unsigned long limit = pack_size_limit && nr_written ?
 				pack_size_limit - write_offset : 0;
+				/* no if no delta */
+	int usable_delta =	!entry->delta ? 0 :
+				/* yes if unlimited packfile */
+				!pack_size_limit ? 1 :
+				/* no if base written to previous pack */
+				entry->delta->offset == (off_t)-1 ? 0 :
+				/* otherwise double-check written to this
+				 * pack,  like we do below
+				 */
+				entry->delta->offset ? 1 : 0;
 
 	if (!pack_to_stdout)
 		crc32_begin(f);
@@ -423,8 +434,7 @@ static unsigned long write_object(struct sha1file *f,
 		to_reuse = 0;	/* can't reuse what we don't have */
 	else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
 				/* check_object() decided it for us ... */
-		to_reuse = !pack_size_limit ||
-			(entry->delta->offset && entry->delta->offset != (off_t)-1);
+		to_reuse = usable_delta;
 				/* ... but pack split may override that */
 	else if (obj_type != entry->in_pack_type)
 		to_reuse = 0;	/* pack has delta which is unusable */
@@ -435,6 +445,10 @@ static unsigned long write_object(struct sha1file *f,
 				 * and we do not need to deltify it.
 				 */
 
+	/* differing core & pack compression when loose object -> must recompress */
+	if (!entry->in_pack && pack_compression_level != zlib_compression_level)
+		to_reuse = 0;
+	else
 	if (!entry->in_pack && !entry->delta) {
 		unsigned char *map;
 		unsigned long mapsize;
@@ -462,16 +476,6 @@ static unsigned long write_object(struct sha1file *f,
 		z_stream stream;
 		unsigned long maxsize;
 		void *out;
-					/* no if no delta */
-		int usable_delta =	!entry->delta ? 0 :
-					/* yes if unlimited packfile */
-					!pack_size_limit ? 1 :
-					/* no if base written to previous pack */
-					entry->delta->offset == (off_t)-1 ? 0 :
-					/* otherwise double-check written to this
-					 * pack,  like we do below
-					 */
-					entry->delta->offset ? 1 : 0;
 		buf = read_sha1_file(entry->sha1, &type, &size);
 		if (!buf)
 			die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -493,7 +497,7 @@ static unsigned long write_object(struct sha1file *f,
 		}
 		/* compress the data to store and put compressed length in datalen */
 		memset(&stream, 0, sizeof(stream));
-		deflateInit(&stream, zlib_compression_level);
+		deflateInit(&stream, pack_compression_level);
 		maxsize = deflateBound(&stream, size);
 		out = xmalloc(maxsize);
 		/* Compress it */
@@ -606,7 +610,7 @@ static unsigned long write_object(struct sha1file *f,
 		unuse_pack(&w_curs);
 		reused++;
 	}
-	if (entry->delta)
+	if (usable_delta)
 		written_delta++;
 	written++;
 	if (!pack_to_stdout)
@@ -1622,6 +1626,16 @@ static int git_pack_config(const char *k, const char *v)
 		window = git_config_int(k, v);
 		return 0;
 	}
+	if (!strcmp(k, "pack.compression")) {
+		int level = git_config_int(k, v);
+		if (level == -1)
+			level = Z_DEFAULT_COMPRESSION;
+		else if (level < 0 || level > Z_BEST_COMPRESSION)
+			die("bad pack compression level %d", level);
+		pack_compression_level = level;
+		pack_compression_seen = 1;
+		return 0;
+	}
 	return git_default_config(k, v);
 }
 
@@ -1732,6 +1746,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	rp_ac = 2;
 
 	git_config(git_pack_config);
+	if (!pack_compression_seen)
+		pack_compression_level = zlib_compression_level;
 
 	progress = isatty(2);
 	for (i = 1; i < argc; i++) {
@@ -1759,6 +1775,18 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 				usage(pack_usage);
 			continue;
 		}
+		if (!prefixcmp(arg, "--compression=")) {
+			char *end;
+			int level = strtoul(arg+14, &end, 0);
+			if (!arg[14] || *end)
+				usage(pack_usage);
+			if (level == -1)
+				level = Z_DEFAULT_COMPRESSION;
+			else if (level < 0 || level > Z_BEST_COMPRESSION)
+				die("bad pack compression level %d", level);
+			pack_compression_level = level;
+			continue;
+		}
 		if (!prefixcmp(arg, "--window=")) {
 			char *end;
 			window = strtoul(arg+9, &end, 0);
-- 
1.5.2.rc0.787.g0014

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]