[PATCH 3/7] diff: add --rename-file

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Git's heuristics to detect renames or copies works most of the time.
This option can be used to correct the result when it goes wrong.
Matching pairs get max rename score and override even exact rename
detection.

Note that --rename-file does not try to break existing diff pairs. So
if you have "abc => def" in your file, but they are already paired up
(e.g. "abc => abc" and "def => def") and not broken down by -B, then
nothing happens.

An assumption is made in this patch, that the rename file only contains
a couple rename pairs, not thousands of them. Looping through all
rename source and destination for each rename line will not affect
performance and we can keep the code simple.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 Documentation/diff-options.txt |  7 +++++
 diff.c                         | 10 +++++++
 diff.h                         |  1 +
 diffcore-rename.c              | 64 ++++++++++++++++++++++++++++++++++++++++--
 t/t4001-diff-rename.sh         | 33 ++++++++++++++++++++++
 5 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 306b7e3..7ae04a0 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -380,6 +380,13 @@ endif::git-log[]
 	projects, so use it with caution.  Giving more than one
 	`-C` option has the same effect.
 
+--rename-file=<path>::
+	The given file contains explicit rename pairs that override
+	automatic detected renames. Each line contains a rename pair
+	in the following format:
++
+<source path> <space> "=>" <space> <destination path>
+
 -D::
 --irreversible-delete::
 	Omit the preimage for deletes, i.e. print only the header but not
diff --git a/diff.c b/diff.c
index 8d38fe8..36cf08b 100644
--- a/diff.c
+++ b/diff.c
@@ -3773,6 +3773,16 @@ int diff_opt_parse(struct diff_options *options,
 		DIFF_OPT_SET(options, RENAME_EMPTY);
 	else if (!strcmp(arg, "--no-rename-empty"))
 		DIFF_OPT_CLR(options, RENAME_EMPTY);
+	else if (skip_prefix(arg, "--rename-file=", &arg)) {
+		struct strbuf sb = STRBUF_INIT;
+		const char *path = arg;
+
+		if (prefix)
+			path = prefix_filename(prefix, strlen(prefix), path);
+		if (strbuf_read_file(&sb, path, 0) == -1)
+			die(_("unable to read %s"), path);
+		options->manual_renames = strbuf_detach(&sb, NULL); /* leak */
+	}
 	else if (!strcmp(arg, "--relative"))
 		DIFF_OPT_SET(options, RELATIVE_NAME);
 	else if (skip_prefix(arg, "--relative=", &arg)) {
diff --git a/diff.h b/diff.h
index 76b5536..37179ba 100644
--- a/diff.h
+++ b/diff.h
@@ -176,6 +176,7 @@ struct diff_options {
 	diff_prefix_fn_t output_prefix;
 	int output_prefix_length;
 	void *output_prefix_data;
+	const char *manual_renames;
 
 	int diff_path_counter;
 };
diff --git a/diffcore-rename.c b/diffcore-rename.c
index af1fe08..79beec8 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -346,8 +346,11 @@ static int find_exact_renames(struct diff_options *options)
 		insert_file_table(&file_table, i, rename_src[i].p->one);
 
 	/* Walk the destinations and find best source match */
-	for (i = 0; i < rename_dst_nr; i++)
+	for (i = 0; i < rename_dst_nr; i++) {
+		if (rename_dst[i].pair)
+			continue; /* dealt with exact match already. */
 		renames += find_identical_files(&file_table, i, options);
+	}
 
 	/* Free the hash data structure and entries */
 	hashmap_free(&file_table, 1);
@@ -355,6 +358,61 @@ static int find_exact_renames(struct diff_options *options)
 	return renames;
 }
 
+static int manual_rename(const char *src, int srclen,
+			 const char *dst, int dstlen)
+{
+	int src_index, dst_index;
+
+	for (src_index = 0; src_index < rename_src_nr; src_index++) {
+		const char *path = rename_src[src_index].p->one->path;
+		if (strlen(path) == srclen && !strncmp(path, src, srclen))
+			break;
+	}
+	if (src_index == rename_src_nr)
+		return 0;
+
+	for (dst_index = 0; dst_index < rename_dst_nr; dst_index++) {
+		const char *path = rename_dst[dst_index].two->path;
+		if (strlen(path) == dstlen && !strncmp(path, dst, dstlen))
+			break;
+	}
+	if (dst_index == rename_dst_nr)
+		return 0;
+
+	record_rename_pair(dst_index, src_index, MAX_SCORE);
+	return 1;
+}
+
+static int find_manual_renames(struct diff_options *options)
+{
+	int renames = 0;
+	const char *p, *end;
+
+	if (!options->manual_renames)
+		return 0;
+
+	p = options->manual_renames;
+	end = p + strlen(p);
+	while (p < end) {
+		const char *line_end = strchr(p, '\n');
+		const char *arrow = strstr(p, " => ");
+		const char *src = p, *dst;
+
+		if (!line_end)
+			line_end = end;
+		p = line_end + 1;
+
+		if (!arrow || arrow >= line_end)
+			continue;
+
+		dst = arrow + strlen(" => ");
+		renames += manual_rename(src, arrow - src,
+					 dst, line_end - dst);
+	}
+
+	return renames;
+}
+
 #define NUM_CANDIDATE_PER_DST 4
 static void record_if_better(struct diff_score m[], struct diff_score *o)
 {
@@ -500,11 +558,13 @@ void diffcore_rename(struct diff_options *options)
 	if (rename_dst_nr == 0 || rename_src_nr == 0)
 		goto cleanup; /* nothing to do */
 
+	rename_count = find_manual_renames(options);
+
 	/*
 	 * We really want to cull the candidates list early
 	 * with cheap tests in order to avoid doing deltas.
 	 */
-	rename_count = find_exact_renames(options);
+	rename_count += find_exact_renames(options);
 
 	/* Did we only want exact renames? */
 	if (minimum_score == MAX_SCORE)
diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh
index 2f327b7..ab9a666 100755
--- a/t/t4001-diff-rename.sh
+++ b/t/t4001-diff-rename.sh
@@ -156,4 +156,37 @@ test_expect_success 'rename pretty print common prefix and suffix overlap' '
 	test_i18ngrep " d/f/{ => f}/e " output
 '
 
+test_expect_success 'manual rename correction' '
+	test_create_repo correct-rename &&
+	(
+		cd correct-rename &&
+		echo one > old-one &&
+		echo two > old-two &&
+		git add old-one old-two &&
+		git commit -m old &&
+		git rm old-one old-two &&
+		echo one > new-one &&
+		echo two > new-two &&
+		git add new-one new-two &&
+		git commit -m new &&
+		git diff -M --summary HEAD^ | grep rename >actual &&
+		cat >expected <<-\EOF &&
+		 rename old-one => new-one (100%)
+		 rename old-two => new-two (100%)
+		EOF
+		test_cmp expected actual &&
+
+		cat >correction <<-\EOF &&
+		old-one => new-two
+		old-two => new-one
+		EOF
+		git diff -M --rename-file=correction --summary HEAD^ | grep rename >actual &&
+		cat >expected <<-\EOF &&
+		 rename old-two => new-one (100%)
+		 rename old-one => new-two (100%)
+		EOF
+		test_cmp expected actual
+	)
+'
+
 test_done
-- 
2.7.0.125.g9eec362

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]