Re: Whitespace and '&nbsp'

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Junio C Hamano <gitster@xxxxxxxxx> writes:

> Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes:
>
>> Now, very arguably this is not a git issue at all. Having the odd nbsp
>> be more visible in my other tools would have been fine - either 'less'
>> showing it (the way it shows other control characters) or my terminal
>> making some visual distinction. That said, I think it's something that
>> git could perhaps protect against a bit.
>
> Yeah. I would have expected that "git log -p" would at least show after
> the fact, relying on "less" being aware about it, but that does not seem
> to be the case.  Setting my pager to "cat -e" is the only workaround I can
> think of right now (shows "M-BM- " there), but of course that wouldn't be
> practical at all. We want to notice this during patch application.

The "diff" side would look like this.  I am tempted to change all the
"char *line" to "unsigned char *line", before updating "apply", though...

 .gitattributes |    2 +-
 ws.c           |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index 5e98806..b4459b0 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,3 @@
 * whitespace=!indent,trail,space
-*.[ch] whitespace=indent,trail,space
+*.[ch] whitespace=indent,trail,space,nbsp
 *.sh whitespace=indent,trail,space
diff --git a/ws.c b/ws.c
index 01762cc..4766d6f 100644
--- a/ws.c
+++ b/ws.c
@@ -20,7 +20,7 @@ static struct whitespace_rule {
 	{ "blank-at-eol", WS_BLANK_AT_EOL, 0 },
 	{ "blank-at-eof", WS_BLANK_AT_EOF, 0 },
 	{ "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 },
-	{ "nbsp", WS_NSBP, 0, 0 },
+	{ "nbsp", WS_NBSP, 0, 0 },
 };
 
 unsigned parse_whitespace_rule(const char *string)
@@ -147,6 +147,49 @@ char *whitespace_error_string(unsigned ws)
 	return strbuf_detach(&err, NULL);
 }
 
+static inline int is_nbsp(const char *at_)
+{
+	const unsigned char *at = (const unsigned char *)at_;
+	return at[0] == 0xc2 && at[1] == 0xa0;
+}
+
+/*
+ * Show line while highlighting nbsp "Â" (c2a0) if ws is set
+ */
+static void emit_with_nbsp_hilite(FILE *stream,
+				  const char *set, const char *reset,
+				  const char *ws,
+				  const char *line, int len)
+{
+	if (!len)
+		return;
+	while (len) {
+		int ok; /* leading segment without problematic nbsp */
+		if (!ws) {
+			ok = len;
+		} else {
+			for (ok = 0; ok < len; ok++) {
+				if (ok < len - 1 && is_nbsp(line + ok))
+					break;
+			}
+		}
+		if (ok) {
+			fputs(set, stream);
+			fwrite(line, ok, 1, stream);
+			fputs(reset, stream);
+		}
+		line += ok;
+		len -= ok;
+		if (len) {
+			fputs(ws, stream);
+			fwrite(line, 2, 1, stream);
+			fputs(reset, stream);
+			line += 2;
+			len -= 2;
+		}
+	}
+}
+
 /* If stream is non-NULL, emits the line after checking. */
 static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 				FILE *stream, const char *set,
@@ -170,6 +213,24 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 		len--;
 	}
 
+	/* Check for nbsp in UTF-8 (c2a0) */
+	if (ws_rule & WS_NBSP) {
+		for (i = 1; i < len; i++) {
+			switch (line[i] & 0xff) {
+			case 0xc2:
+				break;
+			case 0xa0:
+				if ((line[i-1] & 0xff) == 0xc2) {
+					result |= WS_NBSP;
+					continue;
+				}
+				/* fallthru */
+			default:
+				i++;
+			}
+		}
+	}
+
 	/* Check for trailing whitespace. */
 	if (ws_rule & WS_BLANK_AT_EOL) {
 		for (i = len - 1; i >= 0; i--) {
@@ -230,13 +291,11 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 		 * The non-highlighted part ends at "trailing_whitespace".
 		 */
 
-		/* Emit non-highlighted (middle) segment. */
-		if (trailing_whitespace - written > 0) {
-			fputs(set, stream);
-			fwrite(line + written,
-			    trailing_whitespace - written, 1, stream);
-			fputs(reset, stream);
-		}
+		/* Emit middle segment, highlighting nbsp as needed */
+		emit_with_nbsp_hilite(stream, set, reset,
+				      (result & WS_NBSP) ? ws : NULL,
+				      line + written,
+				      trailing_whitespace - written);
 
 		/* Highlight errors in trailing whitespace. */
 		if (trailing_whitespace != len) {
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]