[PATCH 3/3] War on nbsp: Add "nbsp" whitespace breakage class

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Not even "less" shows nbsp as anything special nor unusual, so eyeballing
with "git log -p" after applying a patch that accidentally had it where a
regular SP should be, breaking compilation, would not help.

This only handles "diff", not "apply" yet.

Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---
 * This is the moral equivalent of the earlier patch, but based on the two
   clean-ups.  If we do not consider nbsp an error, at least we should
   count it just like an ordinary SP that takes one display column, and if
   we do not like indenting with non-TAB or trailing whitespaces, we
   should complain loudly, which was the topic of 2/3.

 cache.h                 |   18 ++++++-----
 t/t4019-diff-wserror.sh |    8 ++--
 ws.c                    |   72 ++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/cache.h b/cache.h
index dd34fed..4f58587 100644
--- a/cache.h
+++ b/cache.h
@@ -1122,17 +1122,19 @@ void shift_tree_by(const unsigned char *, const unsigned char *, unsigned char *
 /*
  * whitespace rules.
  * used by both diff and apply
- * last two digits are tab width
+ * last 6-bits are tab width
  */
-#define WS_BLANK_AT_EOL         0100
-#define WS_SPACE_BEFORE_TAB     0200
-#define WS_INDENT_WITH_NON_TAB  0400
-#define WS_CR_AT_EOL           01000
-#define WS_BLANK_AT_EOF        02000
-#define WS_TAB_IN_INDENT       04000
+#define WS_TAB_WIDTH_MASK       077
+#define WS_BLANK_AT_EOL         (1<< 6)
+#define WS_SPACE_BEFORE_TAB     (1<< 7)
+#define WS_INDENT_WITH_NON_TAB  (1<< 8)
+#define WS_CR_AT_EOL            (1<< 9)
+#define WS_BLANK_AT_EOF         (1<<10)
+#define WS_TAB_IN_INDENT        (1<<11)
+#define WS_NBSP                 (1<<12)
 #define WS_TRAILING_SPACE      (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF)
 #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8)
-#define WS_TAB_WIDTH_MASK        077
+
 extern unsigned whitespace_rule_cfg;
 extern unsigned whitespace_rule(const char *);
 extern unsigned parse_whitespace_rule(const char *);
diff --git a/t/t4019-diff-wserror.sh b/t/t4019-diff-wserror.sh
index 665f693..8c7fea2 100755
--- a/t/t4019-diff-wserror.sh
+++ b/t/t4019-diff-wserror.sh
@@ -56,7 +56,7 @@ test_expect_success default '
 	grep End error >/dev/null
 '
 
-test_expect_success 'default (attribute)' '
+test_expect_success 'default (attribute) -- must check all available rule' '
 
 	test_might_fail git config --unset core.whitespace &&
 	echo "F whitespace" >.gitattributes &&
@@ -68,7 +68,7 @@ test_expect_success 'default (attribute)' '
 	grep Return error >/dev/null &&
 	grep No normal >/dev/null &&
 	grep Enough error >/dev/null &&
-	grep Bit normal >/dev/null &&
+	grep Bit error >/dev/null &&
 	grep End error >/dev/null
 '
 
@@ -83,8 +83,8 @@ test_expect_success 'default, tabwidth=10 (attribute)' '
 	grep With error >/dev/null &&
 	grep Return error >/dev/null &&
 	grep No normal >/dev/null &&
-	grep Enough normal >/dev/null &&
-	grep Bit normal >/dev/null &&
+	grep Enough error >/dev/null &&
+	grep Bit error >/dev/null &&
 	grep End error >/dev/null
 '
 
diff --git a/ws.c b/ws.c
index 68c7599..53e263d 100644
--- a/ws.c
+++ b/ws.c
@@ -20,6 +20,7 @@ static struct whitespace_rule {
 	{ "blank-at-eol", WS_BLANK_AT_EOL, 0 },
 	{ "blank-at-eof", WS_BLANK_AT_EOF, 0 },
 	{ "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 },
+	{ "nbsp", WS_NBSP, 0, 0 },
 };
 
 unsigned parse_whitespace_rule(const char *string)
@@ -141,6 +142,8 @@ char *whitespace_error_string(unsigned ws)
 		add_err_item(&err, "indent with spaces");
 	if (ws & WS_TAB_IN_INDENT)
 		add_err_item(&err, "tab in indent");
+	if (ws & WS_NBSP)
+		add_err_item(&err, "&nbsp; in source");
 	return strbuf_detach(&err, NULL);
 }
 
@@ -150,6 +153,45 @@ static int is_nbsp(const char *at_)
 	return at[0] == 0xc2 && at[1] == 0xa0;
 }
 
+/*
+ * Show line while highlighting nbsp "Â" (c2a0) if ws is set
+ */
+static void emit_with_nbsp_hilite(FILE *stream,
+				  const char *set, const char *reset,
+				  const char *ws,
+				  const char *line, int len)
+{
+	if (!len)
+		return;
+	while (len) {
+		/* number of bytes in the leading segment w/o nbsp error */
+		int ok;
+		if (!ws) {
+			ok = len;
+		} else {
+			for (ok = 0; ok < len; ok++) {
+				if (ok < len - 1 && is_nbsp(line + ok))
+					break;
+			}
+		}
+		if (ok) {
+			fputs(set, stream);
+			fwrite(line, ok, 1, stream);
+			fputs(reset, stream);
+		}
+		line += ok;
+		len -= ok;
+		if (len) {
+			/* do not bother bundling consecutive ones */
+			fputs(ws, stream);
+			fwrite(line, 2, 1, stream);
+			fputs(reset, stream);
+			line += 2;
+			len -= 2;
+		}
+	}
+}
+
 /* If stream is non-NULL, emits the line after checking. */
 static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 				FILE *stream, const char *set,
@@ -173,6 +215,24 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 		len--;
 	}
 
+	/* Check for nbsp in UTF-8 (c2a0) */
+	if (ws_rule & WS_NBSP) {
+		for (i = 1; i < len; i++) {
+			switch (line[i] & 0xff) {
+			case 0xc2:
+				break;
+			case 0xa0:
+				if ((line[i-1] & 0xff) == 0xc2) {
+					result |= WS_NBSP;
+					continue;
+				}
+				/* fallthru */
+			default:
+				i++;
+			}
+		}
+	}
+
 	/* Check for trailing whitespace. */
 	if (ws_rule & WS_BLANK_AT_EOL) {
 		for (i = len - 1; i >= 0; i--) {
@@ -245,13 +305,11 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 		 * The non-highlighted part ends at "trailing_whitespace".
 		 */
 
-		/* Emit non-highlighted (middle) segment. */
-		if (trailing_whitespace - written > 0) {
-			fputs(set, stream);
-			fwrite(line + written,
-			    trailing_whitespace - written, 1, stream);
-			fputs(reset, stream);
-		}
+		/* Emit middle segment, highlighting nbsp as needed */
+		emit_with_nbsp_hilite(stream, set, reset,
+				      (result & WS_NBSP) ? ws : NULL,
+				      line + written,
+				      trailing_whitespace - written);
 
 		/* Highlight errors in trailing whitespace. */
 		if (trailing_whitespace != len) {
-- 
1.7.5.3.503.g893a4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]