Junio C Hamano <gitster@xxxxxxxxx> writes: > Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> writes: > >> Now, very arguably this is not a git issue at all. Having the odd nbsp >> be more visible in my other tools would have been fine - either 'less' >> showing it (the way it shows other control characters) or my terminal >> making some visual distinction. That said, I think it's something that >> git could perhaps protect against a bit. > > Yeah. I would have expected that "git log -p" would at least show after > the fact, relying on "less" being aware about it, but that does not seem > to be the case. Setting my pager to "cat -e" is the only workaround I can > think of right now (shows "M-BM- " there), but of course that wouldn't be > practical at all. We want to notice this during patch application. The "diff" side would look like this. I am tempted to change all the "char *line" to "unsigned char *line", before updating "apply", though... .gitattributes | 2 +- ws.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/.gitattributes b/.gitattributes index 5e98806..b4459b0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ * whitespace=!indent,trail,space -*.[ch] whitespace=indent,trail,space +*.[ch] whitespace=indent,trail,space,nbsp *.sh whitespace=indent,trail,space diff --git a/ws.c b/ws.c index 01762cc..4766d6f 100644 --- a/ws.c +++ b/ws.c @@ -20,7 +20,7 @@ static struct whitespace_rule { { "blank-at-eol", WS_BLANK_AT_EOL, 0 }, { "blank-at-eof", WS_BLANK_AT_EOF, 0 }, { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 }, - { "nbsp", WS_NSBP, 0, 0 }, + { "nbsp", WS_NBSP, 0, 0 }, }; unsigned parse_whitespace_rule(const char *string) @@ -147,6 +147,49 @@ char *whitespace_error_string(unsigned ws) return strbuf_detach(&err, NULL); } +static inline int is_nbsp(const char *at_) +{ + const unsigned char *at = (const unsigned char *)at_; + return at[0] == 0xc2 && at[1] == 0xa0; +} + +/* + * Show line while highlighting nbsp "Â" (c2a0) if ws is set + */ +static void emit_with_nbsp_hilite(FILE *stream, + const char *set, const char *reset, + const char *ws, + const char *line, int len) +{ + if (!len) + return; + while (len) { + int ok; /* leading segment without problematic nbsp */ + if (!ws) { + ok = len; + } else { + for (ok = 0; ok < len; ok++) { + if (ok < len - 1 && is_nbsp(line + ok)) + break; + } + } + if (ok) { + fputs(set, stream); + fwrite(line, ok, 1, stream); + fputs(reset, stream); + } + line += ok; + len -= ok; + if (len) { + fputs(ws, stream); + fwrite(line, 2, 1, stream); + fputs(reset, stream); + line += 2; + len -= 2; + } + } +} + /* If stream is non-NULL, emits the line after checking. */ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule, FILE *stream, const char *set, @@ -170,6 +213,24 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule, len--; } + /* Check for nbsp in UTF-8 (c2a0) */ + if (ws_rule & WS_NBSP) { + for (i = 1; i < len; i++) { + switch (line[i] & 0xff) { + case 0xc2: + break; + case 0xa0: + if ((line[i-1] & 0xff) == 0xc2) { + result |= WS_NBSP; + continue; + } + /* fallthru */ + default: + i++; + } + } + } + /* Check for trailing whitespace. */ if (ws_rule & WS_BLANK_AT_EOL) { for (i = len - 1; i >= 0; i--) { @@ -230,13 +291,11 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule, * The non-highlighted part ends at "trailing_whitespace". */ - /* Emit non-highlighted (middle) segment. */ - if (trailing_whitespace - written > 0) { - fputs(set, stream); - fwrite(line + written, - trailing_whitespace - written, 1, stream); - fputs(reset, stream); - } + /* Emit middle segment, highlighting nbsp as needed */ + emit_with_nbsp_hilite(stream, set, reset, + (result & WS_NBSP) ? ws : NULL, + line + written, + trailing_whitespace - written); /* Highlight errors in trailing whitespace. */ if (trailing_whitespace != len) { -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html