[RFC] pack-objects: compression level for non-blobs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add config pack.graphcompression similar to pack.compression.
Applies to non-blob objects and if unspecified falls back to pack.compression.

We may identify objects compressed with level 0 by their leading bytes.
Use this to force recompression when the source and target levels mismatch.
Limit its application to when the config pack.graphcompression is set.

Signed-off-by: David Michael Barr <b@xxxxxxxxxxxx>
---
 builtin/pack-objects.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

 I started working on this just before taking a vacation,
 so it's been a little while coming.

 The intent is to allow selective recompression of pack data.
 For small objects/deltas the overhead of deflate is significant.
 This may improve read performance for the object graph.

 I ran some unscientific experiments with the chromium repository.
 With pack.graphcompression = 0, there was a 2.7% increase in pack size.
 I saw a 35% improvement with cold caches and 43% otherwise on git log --raw.

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f069462..9518daf 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -40,6 +40,7 @@ struct object_entry {
 	unsigned long z_delta_size;	/* delta data size (compressed) */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
+	enum object_type actual_type;
 	enum object_type in_pack_type;	/* could be delta */
 	unsigned char in_pack_header_size;
 	unsigned char preferred_base; /* we do not pack this, but is available
@@ -81,6 +82,8 @@ static int num_preferred_base;
 static struct progress *progress_state;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
+static int pack_graph_compression_level = Z_DEFAULT_COMPRESSION;
+static int pack_graph_compression_seen;
 
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
@@ -125,14 +128,14 @@ static void *get_delta(struct object_entry *entry)
 	return delta_buf;
 }
 
-static unsigned long do_compress(void **pptr, unsigned long size)
+static unsigned long do_compress(void **pptr, unsigned long size, int level)
 {
 	git_zstream stream;
 	void *in, *out;
 	unsigned long maxsize;
 
 	memset(&stream, 0, sizeof(stream));
-	git_deflate_init(&stream, pack_compression_level);
+	git_deflate_init(&stream, level);
 	maxsize = git_deflate_bound(&stream, size);
 
 	in = *pptr;
@@ -191,6 +194,18 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct sha1fi
 	return olen;
 }
 
+static int check_pack_compressed(struct packed_git *p,
+		struct pack_window **w_curs,
+		off_t offset)
+{
+	unsigned long avail;
+	int compressed = 0;
+	unsigned char *in = use_pack(p, w_curs, offset, &avail);
+	if (avail >= 3)
+		compressed = !!(in[2] & 0x6);
+	return compressed;
+}
+
 /*
  * we are going to reuse the existing object data as is.  make
  * sure it is not corrupt.
@@ -240,6 +255,8 @@ static void copy_pack_data(struct sha1file *f,
 	}
 }
 
+#define compression_level(type) ((type) && (type) != OBJ_BLOB ? pack_graph_compression_level : pack_compression_level)
+
 /* Return 0 if we will bust the pack-size limit */
 static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
 					   unsigned long limit, int usable_delta)
@@ -286,7 +303,7 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent
 	else if (entry->z_delta_size)
 		datalen = entry->z_delta_size;
 	else
-		datalen = do_compress(&buf, size);
+		datalen = do_compress(&buf, size, compression_level(entry->actual_type));
 
 	/*
 	 * The object header is a byte of 'type' followed by zero or
@@ -379,6 +396,13 @@ static unsigned long write_reuse_object(struct sha1file *f, struct object_entry
 	offset += entry->in_pack_header_size;
 	datalen -= entry->in_pack_header_size;
 
+	if (!pack_to_stdout &&
+	    pack_graph_compression_seen &&
+	    check_pack_compressed(p, &w_curs, offset) != !!compression_level(entry->actual_type)) {
+		unuse_pack(&w_curs);
+		return write_no_reuse_object(f, entry, limit, usable_delta);
+	}
+
 	if (!pack_to_stdout && p->index_version == 1 &&
 	    check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
 		error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
@@ -955,6 +979,8 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
 	memset(entry, 0, sizeof(*entry));
 	hashcpy(entry->idx.sha1, sha1);
 	entry->hash = hash;
+	if (pack_graph_compression_seen)
+		entry->actual_type = sha1_object_info(sha1, NULL);
 	if (type)
 		entry->type = type;
 	if (exclude)
@@ -1758,7 +1784,8 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
 		 */
 		if (entry->delta_data && !pack_to_stdout) {
 			entry->z_delta_size = do_compress(&entry->delta_data,
-							  entry->delta_size);
+							  entry->delta_size,
+							  compression_level(entry->actual_type));
 			cache_lock();
 			delta_cache_size -= entry->delta_size;
 			delta_cache_size += entry->z_delta_size;
@@ -2159,6 +2186,16 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			    pack_idx_opts.version);
 		return 0;
 	}
+	if (!strcmp(k, "pack.graphcompression")) {
+		int level = git_config_int(k, v);
+		if (level == -1)
+			level = Z_DEFAULT_COMPRESSION;
+		else if (level < 0 || level > Z_BEST_COMPRESSION)
+			die("bad pack graph compression level %d", level);
+		pack_graph_compression_level = level;
+		pack_graph_compression_seen = 1;
+		return 0;
+	}
 	return git_default_config(k, v, cb);
 }
 
@@ -2519,6 +2556,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	argc = parse_options(argc, argv, prefix, pack_objects_options,
 			     pack_usage, 0);
 
+	/* Fall back after option parsing to catch --compression */
+	if (!pack_graph_compression_seen)
+		pack_graph_compression_level = pack_compression_level;
+
 	if (argc) {
 		base_name = argv[0];
 		argc--;
-- 
1.8.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]