From: Elijah Newren <newren@xxxxxxxxx> Make use of the new find_basename_matches() function added in the last two patches, to find renames more rapidly in cases where we can match up files based on basenames. For the testcases mentioned in commit 557ac0350d ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 13.815 s ± 0.062 s 13.138 s ± 0.086 s mega-renames: 1799.937 s ± 0.493 s 169.488 s ± 0.494 s just-one-mega: 51.289 s ± 0.019 s 5.061 s ± 0.017 s Signed-off-by: Elijah Newren <newren@xxxxxxxxx> --- diffcore-rename.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index b1dda41de9b1..206c0bbdcdfb 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -367,7 +367,6 @@ static int find_exact_renames(struct diff_options *options) return renames; } -MAYBE_UNUSED static int find_basename_matches(struct diff_options *options, int minimum_score, int num_src) @@ -718,12 +717,45 @@ void diffcore_rename(struct diff_options *options) if (minimum_score == MAX_SCORE) goto cleanup; + num_sources = rename_src_nr; + + if (want_copies || break_idx) { + /* + * Cull sources: + * - remove ones corresponding to exact renames + */ + trace2_region_enter("diff", "cull after exact", options->repo); + remove_unneeded_paths_from_src(want_copies); + trace2_region_leave("diff", "cull after exact", options->repo); + } else { + /* + * Cull sources: + * - remove ones involved in renames (found via exact match) + */ + trace2_region_enter("diff", "cull exact", options->repo); + remove_unneeded_paths_from_src(want_copies); + trace2_region_leave("diff", "cull exact", options->repo); + + /* Utilize file basenames to quickly find renames. */ + trace2_region_enter("diff", "basename matches", options->repo); + rename_count += find_basename_matches(options, minimum_score, + rename_src_nr); + trace2_region_leave("diff", "basename matches", options->repo); + + /* + * Cull sources, again: + * - remove ones involved in renames (found via basenames) + */ + trace2_region_enter("diff", "cull basename", options->repo); + remove_unneeded_paths_from_src(want_copies); + trace2_region_leave("diff", "cull basename", options->repo); + } + /* - * Calculate how many renames are left + * Calculate how many rename destinations are left */ num_destinations = (rename_dst_nr - rename_count); - remove_unneeded_paths_from_src(want_copies); - num_sources = rename_src_nr; + num_sources = rename_src_nr; /* rename_src_nr reflects lower number */ /* All done? */ if (!num_destinations || !num_sources) @@ -755,7 +787,7 @@ void diffcore_rename(struct diff_options *options) struct diff_score *m; if (rename_dst[i].is_rename) - continue; /* dealt with exact match already. */ + continue; /* exact or basename match already handled */ m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST]; for (j = 0; j < NUM_CANDIDATE_PER_DST; j++) -- gitgitgadget