Add config pack.graphcompression similar to pack.compression. Applies to non-blob objects and if unspecified falls back to pack.compression. We may identify objects compressed with level 0 by their leading bytes. Use this to force recompression when the source and target levels mismatch. Limit its application to when the config pack.graphcompression is set. Signed-off-by: David Michael Barr <b@xxxxxxxxxxxx> --- builtin/pack-objects.c | 49 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) I started working on this just before taking a vacation, so it's been a little while coming. The intent is to allow selective recompression of pack data. For small objects/deltas the overhead of deflate is significant. This may improve read performance for the object graph. I ran some unscientific experiments with the chromium repository. With pack.graphcompression = 0, there was a 2.7% increase in pack size. I saw a 35% improvement with cold caches and 43% otherwise on git log --raw. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f069462..9518daf 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -40,6 +40,7 @@ struct object_entry { unsigned long z_delta_size; /* delta data size (compressed) */ unsigned int hash; /* name hint hash */ enum object_type type; + enum object_type actual_type; enum object_type in_pack_type; /* could be delta */ unsigned char in_pack_header_size; unsigned char preferred_base; /* we do not pack this, but is available @@ -81,6 +82,8 @@ static int num_preferred_base; static struct progress *progress_state; static int pack_compression_level = Z_DEFAULT_COMPRESSION; static int pack_compression_seen; +static int pack_graph_compression_level = Z_DEFAULT_COMPRESSION; +static int pack_graph_compression_seen; static unsigned long delta_cache_size = 0; static unsigned long max_delta_cache_size = 256 * 1024 * 1024; @@ -125,14 +128,14 @@ static void *get_delta(struct object_entry *entry) return delta_buf; } -static unsigned long do_compress(void **pptr, unsigned long size) +static unsigned long do_compress(void **pptr, unsigned long size, int level) { git_zstream stream; void *in, *out; unsigned long maxsize; memset(&stream, 0, sizeof(stream)); - git_deflate_init(&stream, pack_compression_level); + git_deflate_init(&stream, level); maxsize = git_deflate_bound(&stream, size); in = *pptr; @@ -191,6 +194,18 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct sha1fi return olen; } +static int check_pack_compressed(struct packed_git *p, + struct pack_window **w_curs, + off_t offset) +{ + unsigned long avail; + int compressed = 0; + unsigned char *in = use_pack(p, w_curs, offset, &avail); + if (avail >= 3) + compressed = !!(in[2] & 0x6); + return compressed; +} + /* * we are going to reuse the existing object data as is. make * sure it is not corrupt. @@ -240,6 +255,8 @@ static void copy_pack_data(struct sha1file *f, } } +#define compression_level(type) ((type) && (type) != OBJ_BLOB ? pack_graph_compression_level : pack_compression_level) + /* Return 0 if we will bust the pack-size limit */ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry, unsigned long limit, int usable_delta) @@ -286,7 +303,7 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent else if (entry->z_delta_size) datalen = entry->z_delta_size; else - datalen = do_compress(&buf, size); + datalen = do_compress(&buf, size, compression_level(entry->actual_type)); /* * The object header is a byte of 'type' followed by zero or @@ -379,6 +396,13 @@ static unsigned long write_reuse_object(struct sha1file *f, struct object_entry offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; + if (!pack_to_stdout && + pack_graph_compression_seen && + check_pack_compressed(p, &w_curs, offset) != !!compression_level(entry->actual_type)) { + unuse_pack(&w_curs); + return write_no_reuse_object(f, entry, limit, usable_delta); + } + if (!pack_to_stdout && p->index_version == 1 && check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); @@ -955,6 +979,8 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type, memset(entry, 0, sizeof(*entry)); hashcpy(entry->idx.sha1, sha1); entry->hash = hash; + if (pack_graph_compression_seen) + entry->actual_type = sha1_object_info(sha1, NULL); if (type) entry->type = type; if (exclude) @@ -1758,7 +1784,8 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, */ if (entry->delta_data && !pack_to_stdout) { entry->z_delta_size = do_compress(&entry->delta_data, - entry->delta_size); + entry->delta_size, + compression_level(entry->actual_type)); cache_lock(); delta_cache_size -= entry->delta_size; delta_cache_size += entry->z_delta_size; @@ -2159,6 +2186,16 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_idx_opts.version); return 0; } + if (!strcmp(k, "pack.graphcompression")) { + int level = git_config_int(k, v); + if (level == -1) + level = Z_DEFAULT_COMPRESSION; + else if (level < 0 || level > Z_BEST_COMPRESSION) + die("bad pack graph compression level %d", level); + pack_graph_compression_level = level; + pack_graph_compression_seen = 1; + return 0; + } return git_default_config(k, v, cb); } @@ -2519,6 +2556,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, pack_objects_options, pack_usage, 0); + /* Fall back after option parsing to catch --compression */ + if (!pack_graph_compression_seen) + pack_graph_compression_level = pack_compression_level; + if (argc) { base_name = argv[0]; argc--; -- 1.8.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html