Re: [PATCH 2/2] xdiff: implement empty line chunk heuristic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jacob Keller <jacob.keller@xxxxxxxxx> writes:

> I think we're going to make use of xdl_blankline instead of this or
> our own "is_emptyline"

OK, so perhaps either of you two can do a final version people can
start having fun with?

By the way, I really do not want to see something this low-level to
be end-user tweakable with "one bit enable/disable"; the end users
shouldn't have to bother [1].  I left it in but renamed after "what"
it enables/disables, not "how" the enabled thing works, to clarify
that we have this only as a developers' aid.

*1* I am fine with --compaction-heuristic=(shortest|blank|...) that
allows a choice among many as a developers' aid, but I do not think
this topic is there yet.

 Documentation/diff-config.txt  |  9 ++++-----
 Documentation/diff-options.txt | 10 +++++-----
 diff.c                         | 18 +++++++++---------
 xdiff/xdiff.h                  |  2 +-
 xdiff/xdiffi.c                 | 22 ++++++++++------------
 5 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/Documentation/diff-config.txt b/Documentation/diff-config.txt
index c62745b..9bf3e92 100644
--- a/Documentation/diff-config.txt
+++ b/Documentation/diff-config.txt
@@ -166,11 +166,10 @@ diff.tool::
 
 include::mergetools-diff.txt[]
 
-diff.shortestLineHeuristic::
-	Set this option to true to enable the shortest line chunk heuristic when
-	producing diff output. This heuristic will attempt to shift hunks such
-	that the last shortest common line occurs below the hunk with the rest of
-	the context above it.
+diff.compactionHeuristic::
+	Set this option to enable an experimental heuristic that
+	shifts the hunk boundary in an attempt to make the resulting
+	patch easier to read.
 
 diff.algorithm::
 	Choose a diff algorithm.  The variants are as follows:
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 238f39c..b513023 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -63,11 +63,11 @@ ifndef::git-format-patch[]
 	Synonym for `-p --raw`.
 endif::git-format-patch[]
 
---shortest-line-heuristic::
---no-shortest-line-heuristic::
-	When possible, shift common shortest line in diff hunks below the hunk
-	such that the last common shortest line for each hunk is below, with the
-	rest of the context above the hunk.
+--compaction-heuristic::
+--no-compaction-heuristic::
+	These are to help debugging and tuning an experimental
+	heuristic that shifts the hunk boundary in an attempt to
+	make the resulting patch easier to read.
 
 --minimal::
 	Spend extra time to make sure the smallest possible
diff --git a/diff.c b/diff.c
index 276174c..02c75c3 100644
--- a/diff.c
+++ b/diff.c
@@ -25,7 +25,7 @@
 #endif
 
 static int diff_detect_rename_default;
-static int diff_shortest_line_heuristic = 0;
+static int diff_compaction_heuristic = 1;
 static int diff_rename_limit_default = 400;
 static int diff_suppress_blank_empty;
 static int diff_use_color_default = -1;
@@ -184,8 +184,8 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 		diff_detect_rename_default = git_config_rename(var, value);
 		return 0;
 	}
-	if (!strcmp(var, "diff.shortestlineheuristic")) {
-		diff_shortest_line_heuristic = git_config_bool(var, value);
+	if (!strcmp(var, "diff.compactionheuristic")) {
+		diff_compaction_heuristic = git_config_bool(var, value);
 		return 0;
 	}
 	if (!strcmp(var, "diff.autorefreshindex")) {
@@ -3240,8 +3240,8 @@ void diff_setup(struct diff_options *options)
 	options->use_color = diff_use_color_default;
 	options->detect_rename = diff_detect_rename_default;
 	options->xdl_opts |= diff_algorithm;
-	if (diff_shortest_line_heuristic)
-		DIFF_XDL_SET(options, SHORTEST_LINE_HEURISTIC);
+	if (diff_compaction_heuristic)
+		DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
 
 	options->orderfile = diff_order_file_cfg;
 
@@ -3719,10 +3719,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
 	else if (!strcmp(arg, "--ignore-blank-lines"))
 		DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
-	else if (!strcmp(arg, "--shortest-line-heuristic"))
-		DIFF_XDL_SET(options, SHORTEST_LINE_HEURISTIC);
-	else if (!strcmp(arg, "--no-shortest-line-heuristic"))
-		DIFF_XDL_CLR(options, SHORTEST_LINE_HEURISTIC);
+	else if (!strcmp(arg, "--compaction-heuristic"))
+		DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
+	else if (!strcmp(arg, "--no-compaction-heuristic"))
+		DIFF_XDL_CLR(options, COMPACTION_HEURISTIC);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 968ac62..d1dbb27 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -41,7 +41,7 @@ extern "C" {
 
 #define XDF_IGNORE_BLANK_LINES (1 << 7)
 
-#define XDF_SHORTEST_LINE_HEURISTIC (1 << 8)
+#define XDF_COMPACTION_HEURISTIC (1 << 8)
 
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 7d15b26..1ec46e0 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -400,10 +400,9 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 }
 
 
-static int line_length(const char *recs)
+static int is_blank_line(xrecord_t **recs, long ix, long flags)
 {
-	char *s = strchr(recs, '\n');
-	return s ? s - recs : strlen(recs);
+	return xdl_blankline(recs[ix]->ptr, recs[ix]->size, flags);
 }
 
 static int recs_match(xrecord_t **recs, long ixs, long ix, long flags)
@@ -417,7 +416,7 @@ static int recs_match(xrecord_t **recs, long ixs, long ix, long flags)
 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 	long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec;
 	char *rchg = xdf->rchg, *rchgo = xdfo->rchg;
-	unsigned int shortest_line;
+	unsigned int blank_lines;
 	xrecord_t **recs = xdf->recs;
 
 	/*
@@ -451,7 +450,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 
 		do {
 			grpsiz = ix - ixs;
-			shortest_line = UINT_MAX;
+			blank_lines = 0;
 
 			/*
 			 * If the line before the current change group, is equal to
@@ -486,9 +485,8 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 			 * the group.
 			 */
 			while (ix < nrec && recs_match(recs, ixs, ix, flags)) {
-				int l = line_length(recs[ix]->ptr);
-				if (l < shortest_line)
-					shortest_line = l;
+
+				blank_lines += is_blank_line(recs, ix, flags);
 
 				rchg[ixs++] = 0;
 				rchg[ix++] = 1;
@@ -519,15 +517,15 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 
 		/*
 		 * If a group can be moved back and forth, see if there is an
-		 * empty line in the moving space. If there is an empty line,
-		 * make sure the last empty line is the end of the group.
+		 * blank line in the moving space. If there is a blank line,
+		 * make sure the last blank line is the end of the group.
 		 *
 		 * As we shifted the group forward as far as possible, we only
 		 * need to shift it back if at all.
 		 */
-		if ((flags & XDF_SHORTEST_LINE_HEURISTIC)) {
+		if ((flags & XDF_COMPACTION_HEURISTIC)) {
 			while (ixs > 0 &&
-			       line_length(recs[ix - 1]->ptr) > shortest_line &&
+			       !is_blank_line(recs, ix - 1, flags) &&
 			       recs_match(recs, ixs - 1, ix - 1, flags)) {
 				rchg[--ixs] = 1;
 				rchg[--ix] = 0;
-- 
2.8.1-399-g96b3b3a

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]