[PATCH 2/3] War on nbsp: a bit of retreat

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Before introducing a new whitespace breakage class "nbsp" to catch
compiler-breaking use of nbsp (UTF-8 c2a0) in place of SP, update the
current code to treat a nbsp just like SP that takes one display column.

Indenting with 6 nbsp is not an error under the indent-with-non-tab rule,
as it only consumes 6 display columns even though it may occupy 12 bytes.

Signed-off-by: Junio C Hamano <gitster@xxxxxxxxx>
---
 t/t4019-diff-wserror.sh |   93 ++++++++++++++++++++++++++++++++---------------
 ws.c                    |   38 ++++++++++++++++----
 2 files changed, 94 insertions(+), 37 deletions(-)

diff --git a/t/t4019-diff-wserror.sh b/t/t4019-diff-wserror.sh
index a501975..665f693 100755
--- a/t/t4019-diff-wserror.sh
+++ b/t/t4019-diff-wserror.sh
@@ -14,6 +14,9 @@ test_expect_success setup '
 	echo "With trailing SP " >>F &&
 	echo "Carriage ReturnQ" | tr Q "\015" >>F &&
 	echo "No problem" >>F &&
+	echo "Â       Enough NBSP and Space" >>F &&
+	echo "ÂÂÂÂ  Bit of NBSP and Space" >>F &&
+	echo "NBSP At EndÂÂ" >>F &&
 	echo >>F
 
 '
@@ -47,8 +50,10 @@ test_expect_success default '
 	grep HT error >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return error >/dev/null &&
-	grep No normal >/dev/null
-
+	grep Enough normal >/dev/null &&
+	grep No normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'default (attribute)' '
@@ -61,8 +66,10 @@ test_expect_success 'default (attribute)' '
 	grep HT error >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return error >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough error >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'default, tabwidth=10 (attribute)' '
@@ -75,8 +82,10 @@ test_expect_success 'default, tabwidth=10 (attribute)' '
 	grep HT error >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return error >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'no check (attribute)' '
@@ -89,8 +98,10 @@ test_expect_success 'no check (attribute)' '
 	grep HT normal >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'no check, tabwidth=10 (attribute), must be irrelevant' '
@@ -103,8 +114,10 @@ test_expect_success 'no check, tabwidth=10 (attribute), must be irrelevant' '
 	grep HT normal >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'without -trail' '
@@ -117,8 +130,10 @@ test_expect_success 'without -trail' '
 	grep HT error >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'without -trail (attribute)' '
@@ -131,8 +146,10 @@ test_expect_success 'without -trail (attribute)' '
 	grep HT error >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'without -space' '
@@ -145,8 +162,10 @@ test_expect_success 'without -space' '
 	grep HT normal >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return error >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'without -space (attribute)' '
@@ -159,8 +178,10 @@ test_expect_success 'without -space (attribute)' '
 	grep HT normal >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return error >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'with indent-non-tab only' '
@@ -173,8 +194,10 @@ test_expect_success 'with indent-non-tab only' '
 	grep HT normal >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough error >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'with indent-non-tab only (attribute)' '
@@ -187,8 +210,10 @@ test_expect_success 'with indent-non-tab only (attribute)' '
 	grep HT normal >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough error >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'with indent-non-tab only, tabwidth=10' '
@@ -201,8 +226,10 @@ test_expect_success 'with indent-non-tab only, tabwidth=10' '
 	grep HT normal >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'with indent-non-tab only, tabwidth=10 (attribute)' '
@@ -215,8 +242,10 @@ test_expect_success 'with indent-non-tab only, tabwidth=10 (attribute)' '
 	grep HT normal >/dev/null &&
 	grep With normal >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End normal >/dev/null
 '
 
 test_expect_success 'with cr-at-eol' '
@@ -229,8 +258,10 @@ test_expect_success 'with cr-at-eol' '
 	grep HT error >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'with cr-at-eol (attribute)' '
@@ -243,8 +274,10 @@ test_expect_success 'with cr-at-eol (attribute)' '
 	grep HT error >/dev/null &&
 	grep With error >/dev/null &&
 	grep Return normal >/dev/null &&
-	grep No normal >/dev/null
-
+	grep No normal >/dev/null &&
+	grep Enough normal >/dev/null &&
+	grep Bit normal >/dev/null &&
+	grep End error >/dev/null
 '
 
 test_expect_success 'trailing empty lines (1)' '
diff --git a/ws.c b/ws.c
index 3058be4..68c7599 100644
--- a/ws.c
+++ b/ws.c
@@ -144,6 +144,12 @@ char *whitespace_error_string(unsigned ws)
 	return strbuf_detach(&err, NULL);
 }
 
+static int is_nbsp(const char *at_)
+{
+	unsigned const char *at = (unsigned const char *)at_;
+	return at[0] == 0xc2 && at[1] == 0xa0;
+}
+
 /* If stream is non-NULL, emits the line after checking. */
 static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 				FILE *stream, const char *set,
@@ -154,7 +160,7 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 	int trailing_whitespace = -1;
 	int trailing_newline = 0;
 	int trailing_carriage_return = 0;
-	int i;
+	int i, col_offset;
 
 	/* Logic is simpler if we temporarily ignore the trailing newline. */
 	if (len > 0 && line[len - 1] == '\n') {
@@ -170,11 +176,16 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 	/* Check for trailing whitespace. */
 	if (ws_rule & WS_BLANK_AT_EOL) {
 		for (i = len - 1; i >= 0; i--) {
-			if (isspace(line[i])) {
+			int is_space = isspace(line[i]);
+
+			if (!is_space && i && is_nbsp(&line[i-1])) {
+				is_space = 1;
+				i--;
+			}
+			if (is_space) {
 				trailing_whitespace = i;
 				result |= WS_BLANK_AT_EOL;
-			}
-			else
+			} else
 				break;
 		}
 	}
@@ -183,9 +194,14 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 		trailing_whitespace = len;
 
 	/* Check indentation */
-	for (i = 0; i < trailing_whitespace; i++) {
+	for (i = col_offset = 0; i < trailing_whitespace; i++) {
 		if (line[i] == ' ')
 			continue;
+		if (i + 1 < trailing_whitespace && is_nbsp(&line[i])) {
+			i++;
+			col_offset++;
+			continue;
+		}
 		if (line[i] != '\t')
 			break;
 		if ((ws_rule & WS_SPACE_BEFORE_TAB) && written < i) {
@@ -208,10 +224,12 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 			fwrite(line + written, i - written + 1, 1, stream);
 		}
 		written = i + 1;
+		col_offset = 0;
 	}
 
 	/* Check for indent using non-tab. */
-	if ((ws_rule & WS_INDENT_WITH_NON_TAB) && i - written >= ws_tab_width(ws_rule)) {
+	if ((ws_rule & WS_INDENT_WITH_NON_TAB) &&
+	    i - written - col_offset >= ws_tab_width(ws_rule)) {
 		result |= WS_INDENT_WITH_NON_TAB;
 		if (stream) {
 			fputs(ws, stream);
@@ -270,7 +288,13 @@ int ws_blank_line(const char *line, int len, unsigned ws_rule)
 	 * for now we just use this stupid definition.
 	 */
 	while (len-- > 0) {
-		if (!isspace(*line))
+		if (isspace(*line))
+			;
+		else if (len && is_nbsp(line)) {
+			line++;
+			len--;
+		}
+		else
 			return 0;
 		line++;
 	}
-- 
1.7.5.3.503.g893a4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]