[PATCH 4/5] Allow xdiff machinery to cache hash results for a file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When generating diffs against the same file multiple times, it is a
waste of work to regenerate the hash values for each line each time.
Instead, allow a cache pointer to be passed in xpparam_t; set mf1_cache
to the cache for the first mmfile_t to xdl_diff, and mf2_cache for the
second.

This works like:

	xdcache_t cache;
	memset(cache, 0, sizeof(cache));
	/* ... */
	xpp.mf1_cache = &cache;
	xdl_diff(file1, file2, &xpp, &xecfg, &ecb);
	/* ...later... */
	xpp.mf1_cache = &cache;
	xdl_diff(file1, file3, &xpp, &xecfg, &ecb);
	/* The cache for file1 will be reused. */
	xdl_cache_free(&cache);

Note that this isn't compatible with xdi_diff as-is, as getting a
complete cache is incompatible with tail trimming.

Signed-off-by: Brian Downing <bdowning@xxxxxxxxx>
---
 xdiff/xdiff.h    |   11 ++++++++++
 xdiff/xprepare.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++------
 xdiff/xtypes.h   |    1 +
 3 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 281fc0b..6fd922b 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -65,8 +65,17 @@ typedef struct s_mmbuffer {
 	long size;
 } mmbuffer_t;
 
+typedef struct s_xdcache_int {
+	long nrec;
+	unsigned long flags;
+	unsigned long *ha;
+	long *size;
+} xdcache_t;
+
 typedef struct s_xpparam {
 	unsigned long flags;
+	xdcache_t *mf1_cache;
+	xdcache_t *mf2_cache;
 } xpparam_t;
 
 typedef struct s_xdemitcb {
@@ -104,6 +113,8 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, const char *name1,
 		mmfile_t *mf2, const char *name2,
 		xpparam_t const *xpp, int level, mmbuffer_t *result);
 
+void xdl_cache_free(xdcache_t *cache);
+
 #ifdef __cplusplus
 }
 #endif /* #ifdef __cplusplus */
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index e87ab57..291caf9 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -54,7 +54,8 @@ static void xdl_free_classifier(xdlclassifier_t *cf);
 static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
 			       xrecord_t *rec);
 static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
-			   xdlclassifier_t *cf, xdfile_t *xdf);
+			   xdlclassifier_t *cf, xdfile_t *xdf,
+			   xdcache_t *cache);
 static void xdl_free_ctx(xdfile_t *xdf);
 static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
 static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
@@ -135,7 +136,8 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
 
 
 static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
-			   xdlclassifier_t *cf, xdfile_t *xdf) {
+			   xdlclassifier_t *cf, xdfile_t *xdf,
+			   xdcache_t *cache) {
 	unsigned int hbits;
 	long i, nrec, hsize, bsize;
 	unsigned long hav;
@@ -177,7 +179,12 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
 				top = blk + bsize;
 			}
 			prev = cur;
-			hav = xdl_hash_record(&cur, top, xpp->flags);
+			if (cache && cache->ha) {
+				hav = cache->ha[nrec];
+				cur += cache->size[nrec];
+			} else {
+				hav = xdl_hash_record(&cur, top, xpp->flags);
+			}
 			if (nrec >= narec) {
 				narec *= 2;
 				if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) {
@@ -199,6 +206,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
 			crec->ptr = prev;
 			crec->size = (long) (cur - prev);
 			crec->ha = hav;
+			crec->original_ha = hav;
 			recs[nrec++] = crec;
 
 			if (xdl_classify_record(cf, rhash, hbits, crec) < 0) {
@@ -249,6 +257,23 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
 	xdf->dstart = 0;
 	xdf->dend = nrec - 1;
 
+	if (cache && !cache->ha) {
+		cache->nrec = nrec;
+		cache->ha = xdl_malloc(nrec * sizeof(unsigned long));
+		if (!cache->ha)
+			return 0;
+		cache->size = xdl_malloc(nrec * sizeof(long));
+		if (!cache->size) {
+			xdl_free(cache->ha);
+			cache->ha = NULL;
+			return 0;
+		}
+		for (i = 0; i < nrec; ++i) {
+			cache->ha[i] = recs[i]->original_ha;
+			cache->size[i] = recs[i]->size;
+		}
+	}
+
 	return 0;
 }
 
@@ -268,21 +293,34 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		    xdfenv_t *xe) {
 	long enl1, enl2;
 	xdlclassifier_t cf;
+	xdcache_t *c1 = xpp->mf1_cache;
+	xdcache_t *c2 = xpp->mf2_cache;
 
-	enl1 = xdl_guess_lines(mf1) + 1;
-	enl2 = xdl_guess_lines(mf2) + 1;
+	if (c1) {
+		if (c1->flags != xpp->flags)
+			xdl_cache_free(c1);
+		c1->flags = xpp->flags;
+	}
+	if (c2) {
+		if (c2->flags != xpp->flags)
+			xdl_cache_free(c2);
+		c2->flags = xpp->flags;
+	}
+
+	enl1 = c1 && c1->nrec ? c1->nrec : (xdl_guess_lines(mf1) + 1);
+	enl2 = c2 && c2->nrec ? c2->nrec : (xdl_guess_lines(mf2) + 1);
 
 	if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) {
 
 		return -1;
 	}
 
-	if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+	if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1, c1) < 0) {
 
 		xdl_free_classifier(&cf);
 		return -1;
 	}
-	if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+	if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2, c2) < 0) {
 
 		xdl_free_ctx(&xe->xdf1);
 		xdl_free_classifier(&cf);
@@ -309,6 +347,13 @@ void xdl_free_env(xdfenv_t *xe) {
 }
 
 
+void xdl_cache_free(xdcache_t *cache) {
+	xdl_free(cache->ha);
+	xdl_free(cache->size);
+	memset(cache, 0, sizeof(xdcache_t));
+}
+
+
 static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
 	long r, rdis0, rpdis0, rdis1, rpdis1;
 
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 2511aef..e6f6890 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -43,6 +43,7 @@ typedef struct s_xrecord {
 	char const *ptr;
 	long size;
 	unsigned long ha;
+	unsigned long original_ha;
 } xrecord_t;
 
 typedef struct s_xdfile {
-- 
1.5.6.1

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux