[PATCH] diff-delta: allow reusing of the reference buffer index

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When a reference buffer is used multiple times then its index can be 
computed only once and reused multiple times.  This patch adds an extra 
pointer to a pointer argument (from_index) to diff_delta() for this.

If from_index is NULL then everything is like before.

If from_index is non NULL and *from_index is NULL then the index is 
created and its location stored to *from_index.  In this case the caller 
has the responsibility to free the memory pointed to by *from_index.

If from_index and *from_index are non NULL then the index is reused as 
is.

This currently saves about 10% of CPU time to repack the git archive.

Signed-off-by: Nicolas Pitre <nico@xxxxxxx>

---

This is partly what Linus suggested recently.  However I only made the 
minimum changes to pack-objects.c to try it out and demonstrate how it 
works.  I however don't feel confortable enough with the changes 
required to implement the full solution which is to turn the window 
processing around i.e. keep the reference buffer stable while different 
targets are tested against this reference.  This is something worth 
doing since that would reduce memory usage significantly.  But Like I 
said, pack-objects as grown more complex with Junio latest additions 
so...

 delta.h           |    3 ++-
 diff-delta.c      |   41 +++++++++++++++++++++++++++--------------
 diffcore-break.c  |    2 +-
 diffcore-rename.c |    2 +-
 pack-objects.c    |   11 ++++++++---
 test-delta.c      |    2 +-
 6 files changed, 40 insertions(+), 21 deletions(-)

f660c44250f5c02a7e773ed991316f7f16950f3e
diff --git a/delta.h b/delta.h
index a15350d..00fef0b 100644
--- a/delta.h
+++ b/delta.h
@@ -4,7 +4,8 @@
 /* handling of delta buffers */
 extern void *diff_delta(void *from_buf, unsigned long from_size,
 			void *to_buf, unsigned long to_size,
-		        unsigned long *delta_size, unsigned long max_size);
+		        unsigned long *delta_size, unsigned long max_size,
+			void **from_index);
 extern void *patch_delta(void *src_buf, unsigned long src_size,
 			 void *delta_buf, unsigned long delta_size,
 			 unsigned long *dst_size);
diff --git a/diff-delta.c b/diff-delta.c
index bb07926..67561cd 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -30,8 +30,7 @@ struct index {
 
 static struct index ** delta_index(const unsigned char *buf,
 				   unsigned long bufsize,
-				   unsigned long trg_bufsize,
-				   unsigned int *hash_shift)
+				   unsigned long trg_bufsize)
 {
 	unsigned long hsize;
 	unsigned int i, hshift, hlimit, *hash_count;
@@ -44,14 +43,17 @@ static struct index ** delta_index(const
 	for (i = 8; (1 << i) < hsize && i < 24; i += 2);
 	hsize = 1 << i;
 	hshift = (i - 8) / 2;
-	*hash_shift = hshift;
 
-	/* allocate lookup index */
-	mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
+	/*
+	 * Allocate lookup index.  Note the first hash pointer
+	 * is used to store the hash shift value.
+	 */
+	mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
 	if (!mem)
 		return NULL;
 	hash = mem;
-	entry = mem + hsize * sizeof(*hash);
+	*hash++ = (void *)hshift;
+	entry = mem + (1 + hsize) * sizeof(*hash);
 	memset(hash, 0, hsize * sizeof(*hash));
 
 	/* allocate an array to count hash entries */
@@ -107,7 +109,7 @@ static struct index ** delta_index(const
 	}
 	free(hash_count);
 
-	return hash;
+	return hash-1;
 }
 
 /* provide the size of the copy opcode given the block offset and size */
@@ -121,7 +123,8 @@ static struct index ** delta_index(const
 void *diff_delta(void *from_buf, unsigned long from_size,
 		 void *to_buf, unsigned long to_size,
 		 unsigned long *delta_size,
-		 unsigned long max_size)
+		 unsigned long max_size,
+		 void **from_index)
 {
 	unsigned int i, outpos, outsize, inscnt, hash_shift;
 	const unsigned char *ref_data, *ref_top, *data, *top;
@@ -130,9 +133,16 @@ void *diff_delta(void *from_buf, unsigne
 
 	if (!from_size || !to_size)
 		return NULL;
-	hash = delta_index(from_buf, from_size, to_size, &hash_shift);
-	if (!hash)
-		return NULL;
+	if (from_index && *from_index) {
+		hash = *from_index;
+	} else {
+		hash = delta_index(from_buf, from_size, to_size);
+		if (!hash)
+			return NULL;
+		if (from_index)
+			*from_index = hash;
+	}
+	hash_shift = (unsigned int)(*hash++);
 
 	outpos = 0;
 	outsize = 8192;
@@ -140,7 +150,8 @@ void *diff_delta(void *from_buf, unsigne
 		outsize = max_size + MAX_OP_SIZE + 1;
 	out = malloc(outsize);
 	if (!out) {
-		free(hash);
+		if (!from_index)
+			free(hash-1);
 		return NULL;
 	}
 
@@ -241,7 +252,8 @@ void *diff_delta(void *from_buf, unsigne
 				out = realloc(out, outsize);
 			if (!out) {
 				free(tmp);
-				free(hash);
+				if (!from_index)
+					free(hash-1);
 				return NULL;
 			}
 		}
@@ -250,7 +262,8 @@ void *diff_delta(void *from_buf, unsigne
 	if (inscnt)
 		out[outpos - inscnt - 1] = inscnt;
 
-	free(hash);
+	if (!from_index)
+		free(hash-1);
 	*delta_size = outpos;
 	return out;
 }
diff --git a/diffcore-break.c b/diffcore-break.c
index c57513a..6dc22d5 100644
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -67,7 +67,7 @@ static int should_break(struct diff_file
 
 	delta = diff_delta(src->data, src->size,
 			   dst->data, dst->size,
-			   &delta_size, 0);
+			   &delta_size, 0, NULL);
 	if (!delta)
 		return 0; /* error but caught downstream */
 
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 39d9126..5b4cde4 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -168,7 +168,7 @@ static int estimate_similarity(struct di
 	delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
 	delta = diff_delta(src->data, src->size,
 			   dst->data, dst->size,
-			   &delta_size, delta_limit);
+			   &delta_size, delta_limit, NULL);
 	if (!delta)
 		/* If delta_limit is exceeded, we have too much differences */
 		return 0;
diff --git a/pack-objects.c b/pack-objects.c
index ceb107f..2b2d9a9 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -202,7 +202,7 @@ static void *delta_against(void *buf, un
 	if (!otherbuf)
 		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
         delta_buf = diff_delta(otherbuf, othersize,
-			       buf, size, &delta_size, 0);
+			       buf, size, &delta_size, 0, NULL);
         if (!delta_buf || delta_size != entry->delta_size)
         	die("delta size changed");
         free(buf);
@@ -707,6 +707,7 @@ static int type_size_sort(const struct o
 struct unpacked {
 	struct object_entry *entry;
 	void *data;
+	void **delta_index;
 };
 
 /*
@@ -789,7 +790,8 @@ static int try_delta(struct unpacked *cu
 	if (sizediff >= max_size)
 		return -1;
 	delta_buf = diff_delta(old->data, oldsize,
-			       cur->data, size, &delta_size, max_size);
+			       cur->data, size, &delta_size,
+			       max_size, old->delta_index);
 	if (!delta_buf)
 		return 0;
 	cur_entry->delta = old_entry;
@@ -830,6 +832,7 @@ static void find_deltas(struct object_en
 			 */
 			continue;
 
+		free(n->delta_index);
 		free(n->data);
 		n->entry = entry;
 		n->data = read_sha1_file(entry->sha1, type, &size);
@@ -853,8 +856,10 @@ static void find_deltas(struct object_en
 			idx = 0;
 	}
 
-	for (i = 0; i < window; ++i)
+	for (i = 0; i < window; ++i) {
+		free(array[i].delta_index);
 		free(array[i].data);
+	}
 	free(array);
 }
 
diff --git a/test-delta.c b/test-delta.c
index 1be8ee0..89eb68e 100644
--- a/test-delta.c
+++ b/test-delta.c
@@ -63,7 +63,7 @@ int main(int argc, char *argv[])
 	if (argv[1][1] == 'd')
 		out_buf = diff_delta(from_buf, from_size,
 				     data_buf, data_size,
-				     &out_size, 0);
+				     &out_size, 0, NULL);
 	else
 		out_buf = patch_delta(from_buf, from_size,
 				      data_buf, data_size,
-- 
1.2.3.g8fcf1-dirty

-
: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]