This speeds up estimate_similarity by caching the similarity score of pairs of blob sha1s. Signed-off-by: Jeff King <peff@xxxxxxxx> --- Some interesting things to time with this are: - "git log --raw -M" on a repo with a lot of paths or a lot of renames (I found on git.git, the speedup was not that impressive) - "git log --raw -C -C" on any repo (this speeds up a lot in git.git). - "git show -M" on commits with very large blobs cache.h | 1 + diff.c | 6 ++++++ diffcore-rename.c | 11 ++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index 23a2f93..7ee1caf 100644 --- a/cache.h +++ b/cache.h @@ -1228,6 +1228,7 @@ int add_files_to_cache(const char *prefix, const char **pathspec, int flags); /* diff.c */ extern int diff_auto_refresh_index; +extern int diff_cache_renames; /* match-trees.c */ void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int); diff --git a/diff.c b/diff.c index 95706a5..c84e043 100644 --- a/diff.c +++ b/diff.c @@ -34,6 +34,7 @@ static int diff_no_prefix; static int diff_stat_graph_width; static int diff_dirstat_permille_default = 30; static struct diff_options default_diff_options; +int diff_cache_renames; static char diff_colors[][COLOR_MAXLEN] = { GIT_COLOR_RESET, @@ -214,6 +215,11 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "diff.cacherenames")) { + diff_cache_renames = git_config_bool(var, value); + return 0; + } + if (!prefixcmp(var, "submodule.")) return parse_submodule_config_option(var, value); diff --git a/diffcore-rename.c b/diffcore-rename.c index 216a7a4..611e1d3 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -6,6 +6,7 @@ #include "diffcore.h" #include "hash.h" #include "progress.h" +#include "metadata-cache.h" /* Table of rename/copy destinations */ @@ -137,7 +138,8 @@ static int estimate_similarity(struct diff_filespec *src, */ unsigned long max_size, delta_size, base_size, src_copied, literal_added; unsigned long delta_limit; - int score; + uint32_t score; + struct sha1pair pair; /* We deal only with regular files. Symlink renames are handled * only when they are exact matches --- in other words, no edits @@ -175,6 +177,11 @@ static int estimate_similarity(struct diff_filespec *src, if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) return 0; + hashcpy(pair.one, src->sha1); + hashcpy(pair.two, dst->sha1); + if (diff_cache_renames && rename_cache_get(&pair, &score)) + return score; + if (!src->cnt_data && diff_populate_filespec(src, 0)) return 0; if (!dst->cnt_data && diff_populate_filespec(dst, 0)) @@ -195,6 +202,8 @@ static int estimate_similarity(struct diff_filespec *src, score = 0; /* should not happen */ else score = (int)(src_copied * MAX_SCORE / max_size); + if (diff_cache_renames) + rename_cache_set(&pair, score); return score; } -- 1.7.12.rc1.7.g7a223a6 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html