[PATCH 2/3] git-pack-objects: cache small deltas between big objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Creating deltas between big blobs is a CPU and memory intensive task.
In the writing phase, all (not reused) deltas are redone.

This patch adds support for caching deltas from the deltifing phase, so
that that the writing phase is faster.

The caching is limited to small deltas to avoid increasing memory usage very much.
The implemented limit is (memory needed to create the delta)/1024.

Signed-off-by: Martin Koegler <mkoegler@xxxxxxxxxxxxxxxxx>
---
The patch is an improved version of the last patch. I added a limit
for the cache size. It's only configureable by git-config, as 
builtin-back-objects (git-repack) has already enough options.

The patch applies on top of next.

 Documentation/config.txt |    5 +++
 builtin-pack-objects.c   |   69 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 3d8f03d..83cc4cd 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -567,6 +567,11 @@ pack.compression::
 	slowest.  If not set,  defaults to core.compression.  If that is
 	not set,  defaults to -1.
 
+pack.deltaCacheSize::
+	The maxium memory in bytes used for caching deltas in 
+	gitlink:git-pack-objects[1]. 	
+	A value of 0 means no limit. Defaults to 0.
+
 pull.octopus::
 	The default merge strategy to use when pulling multiple branches
 	at once.
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 17627b3..85e08dc 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -36,6 +36,7 @@ struct object_entry {
 	struct object_entry *delta_sibling; /* other deltified objects who
 					     * uses the same base as me
 					     */
+	void *delta_data;	/* cached delta (uncompressed) */
 	unsigned long delta_size;	/* delta data size (uncompressed) */
 	enum object_type type;
 	enum object_type in_pack_type;	/* could be delta */
@@ -76,6 +77,9 @@ static struct progress progress_state;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
 
+static unsigned long delta_cache_size = 0;
+static unsigned long max_delta_cache_size = 0;
+
 /*
  * The object names in objects array are hashed with this hashtable,
  * to help looking up the entry by object name.
@@ -405,24 +409,31 @@ static unsigned long write_object(struct sha1file *f,
 		z_stream stream;
 		unsigned long maxsize;
 		void *out;
-		buf = read_sha1_file(entry->sha1, &type, &size);
-		if (!buf)
-			die("unable to read %s", sha1_to_hex(entry->sha1));
-		if (size != entry->size)
-			die("object %s size inconsistency (%lu vs %lu)",
-			    sha1_to_hex(entry->sha1), size, entry->size);
-		if (usable_delta) {
-			buf = delta_against(buf, size, entry);
+		if (entry->delta_data && usable_delta) {
+			buf = entry->delta_data;
 			size = entry->delta_size;
 			obj_type = (allow_ofs_delta && entry->delta->offset) ?
 				OBJ_OFS_DELTA : OBJ_REF_DELTA;
 		} else {
-			/*
-			 * recover real object type in case
-			 * check_object() wanted to re-use a delta,
-			 * but we couldn't since base was in previous split pack
-			 */
-			obj_type = type;
+			buf = read_sha1_file(entry->sha1, &type, &size);
+			if (!buf)
+				die("unable to read %s", sha1_to_hex(entry->sha1));
+			if (size != entry->size)
+				die("object %s size inconsistency (%lu vs %lu)",
+				    sha1_to_hex(entry->sha1), size, entry->size);
+			if (usable_delta) {
+				buf = delta_against(buf, size, entry);
+				size = entry->delta_size;
+				obj_type = (allow_ofs_delta && entry->delta->offset) ?
+					OBJ_OFS_DELTA : OBJ_REF_DELTA;
+			} else {
+				/*
+				 * recover real object type in case
+				 * check_object() wanted to re-use a delta,
+				 * but we couldn't since base was in previous split pack
+				 */
+				obj_type = type;
+			}
 		}
 		/* compress the data to store and put compressed length in datalen */
 		memset(&stream, 0, sizeof(stream));
@@ -1385,6 +1396,20 @@ struct unpacked {
 	struct delta_index *index;
 };
 
+static int delta_cacheable (struct unpacked *trg, struct unpacked *src,
+			    unsigned long src_size, unsigned long trg_size,
+			    unsigned long delta_size)
+{
+	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
+		return 0;
+
+	/* cache delta, if objects are large enough compared to delta size */
+	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
+		return 1;
+
+	return 0;
+}
+
 /*
  * We search for deltas _backwards_ in a list sorted by type and
  * by size, so that we see progressively smaller and smaller files.
@@ -1466,10 +1491,20 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	if (!delta_buf)
 		return 0;
 
+	if (trg_entry->delta_data) {
+		delta_cache_size -= trg_entry->delta_size;
+		free (trg_entry->delta_data);
+	}
+	trg_entry->delta_data = 0;
 	trg_entry->delta = src_entry;
 	trg_entry->delta_size = delta_size;
 	trg_entry->depth = src_entry->depth + 1;
-	free(delta_buf);
+
+	if (delta_cacheable (src, trg, src_size, trg_size, delta_size)) {
+		trg_entry->delta_data = realloc(delta_buf, delta_size);
+		delta_cache_size += trg_entry->delta_size;
+	} else
+		free(delta_buf);
 	return 1;
 }
 
@@ -1615,6 +1650,10 @@ static int git_pack_config(const char *k, const char *v)
 		pack_compression_seen = 1;
 		return 0;
 	}
+	if(!strcmp(k, "pack.deltacachesize")) {
+		max_delta_cache_size = git_config_int(k, v);
+		return 0;
+	}
 	return git_default_config(k, v);
 }
 
-- 
1.5.2.846.g9a144

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux