Signed-off-by: Steven Grimm <koreth@xxxxxxxxxxxxx> --- Junio rightly points out that it would be a mistake to discard \r characters from binary files when computing similarity scores. So now we only do it if the file contents test as non-binary. The file attributes aren't available at this level of the code, but they could be propagated down from the higher levels if we don't trust buffer_is_binary() to make an adequately accurate decision. diffcore-delta.c | 19 +++++++++++++------ 1 files changed, 13 insertions(+), 6 deletions(-) diff --git a/diffcore-delta.c b/diffcore-delta.c index 7338a40..52e648f 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -1,6 +1,7 @@ #include "cache.h" #include "diff.h" #include "diffcore.h" +#include "xdiff-interface.h" /* * Idea here is very simple. @@ -125,7 +126,8 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top, } } -static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz) +static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz, + int is_binary) { int i, n; unsigned int accum1, accum2, hashval; @@ -143,9 +145,12 @@ static struct spanhash_top *hash_chars(unsigned char *buf, unsigned int sz) unsigned int c = *buf++; unsigned int old_1 = accum1; sz--; - accum1 = (accum1 << 7) ^ (accum2 >> 25); - accum2 = (accum2 << 7) ^ (old_1 >> 25); - accum1 += c; + /* Ignore \r\n vs. \n when computing text file similarity. */ + if (c != '\r' && ! is_binary) { + accum1 = (accum1 << 7) ^ (accum2 >> 25); + accum2 = (accum2 << 7) ^ (old_1 >> 25); + accum1 += c; + } if (++n < 64 && c != '\n') continue; hashval = (accum1 + accum2 * 0x61) % HASHBASE; @@ -172,14 +177,16 @@ int diffcore_count_changes(void *src, unsigned long src_size, if (src_count_p) src_count = *src_count_p; if (!src_count) { - src_count = hash_chars(src, src_size); + int src_is_binary = buffer_is_binary(src, src_size); + src_count = hash_chars(src, src_size, src_is_binary); if (src_count_p) *src_count_p = src_count; } if (dst_count_p) dst_count = *dst_count_p; if (!dst_count) { - dst_count = hash_chars(dst, dst_size); + int dst_is_binary = buffer_is_binary(dst, dst_size); + dst_count = hash_chars(dst, dst_size, dst_is_binary); if (dst_count_p) *dst_count_p = dst_count; } -- 1.5.2.2.571.ge134 - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html