To discover filenames from the --- and +++ lines in a traditional unified diff, currently ‘git apply’ scans forward for a whitespace character on each line and stops there. It can’t use the whole line because ‘diff -u’ likes to include timestamps, like so: --- foo 2000-07-12 16:56:50.020000414 -0500 +++ bar 2010-07-12 16:56:50.020000414 -0500 The whitespace-seeking heuristic works great, even when the tab has been converted to spaces by some email + copy-and-paste related corruption. Except for one problem: if the filename itself contains whitespace, the inferred filename will be too short. When Giuseppe ran into this problem, it was for a file creation patch (filename ‘debian/licenses/LICENSE.global BSD-style Chromium’). So one can’t use the list of files present in the index to deduce an appropriate filename (not to mention that way lies madness; see v0.99~402, 2005-05-31). Instead, look for a timestamp and use that if present to mark the end of the filename. If no timestamp is present, the old heuristic is used, with one exception: the space character \040 is not considered terminating whitespace any more unless it is followed by a timestamp. Reported-by: Giuseppe Iuculano <iuculano@xxxxxxxxxx> Signed-off-by: Jonathan Nieder <jrnieder@xxxxxxxxx> Acked-by: Guido Günther <agx@xxxxxxxxxxx> --- Guido, I have carried over your ack from <http://bugs.debian.org/578752>. I hope that is okay. And to both Guido and Giuseppe, sorry to have taken so long on this. That is the end of the series. I hope it was not too unpleasant a read. As always, thoughts, improvements, bugs welcome. Regards, Jonathan builtin/apply.c | 193 +++++++++++++++++++++++++++++++++++--- t/t4135-apply-weird-filenames.sh | 4 +- 2 files changed, 181 insertions(+), 16 deletions(-) diff --git a/builtin/apply.c b/builtin/apply.c index efc109e..b975c99 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -449,23 +449,157 @@ static char *find_name_gnu(const char *line, char *def, int p_value) return squash_slash(strbuf_detach(&name, NULL)); } -static char *find_name(const char *line, char *def, int p_value, int terminate) +static size_t tz_len(const char *line, size_t len) +{ + const char *tz, *p; + + if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ') + return 0; + tz = line + len - strlen(" +0500"); + + if (tz[1] != '+' && tz[1] != '-') + return 0; + + for (p = tz + 2; p != line + len; p++) + if (!isdigit(*p)) + return 0; + + return line + len - tz; +} + +static size_t date_len(const char *line, size_t len) +{ + const char *date, *p; + + if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-') + return 0; + p = date = line + len - strlen("72-02-05"); + + if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a date. */ + return 0; + + if (date - line >= strlen("19") && + isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */ + date -= strlen("19"); + + return line + len - date; +} + +static size_t short_time_len(const char *line, size_t len) +{ + const char *time, *p; + + if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':') + return 0; + p = time = line + len - strlen(" 07:01:32"); + + /* Permit 1-digit hours? */ + if (*p++ != ' ' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a time. */ + return 0; + + return line + len - time; +} + +static size_t fractional_time_len(const char *line, size_t len) +{ + const char *p; + size_t n; + + /* Expected format: 19:41:17.620000023 */ + if (!len || !isdigit(line[len - 1])) + return 0; + p = line + len - 1; + + /* Fractional seconds. */ + while (p > line && isdigit(*p)) + p--; + if (*p != '.') + return 0; + + /* Hours, minutes, and whole seconds. */ + n = short_time_len(line, p - line); + if (!n) + return 0; + + return line + len - p + n; +} + +static size_t trailing_spaces_len(const char *line, size_t len) +{ + const char *p; + + /* Expected format: ' ' x (1 or more) */ + if (!len || line[len - 1] != ' ') + return 0; + + p = line + len; + while (p != line) { + p--; + if (*p != ' ') + return line + len - (p + 1); + } + + /* All spaces! */ + return len; +} + +static size_t diff_timestamp_len(const char *line, size_t len) +{ + const char *end = line + len; + size_t n; + + /* + * Posix: 2010-07-05 19:41:17 + * GNU: 2010-07-05 19:41:17.620000023 -0500 + */ + + if (!isdigit(end[-1])) + return 0; + + n = tz_len(line, end - line); + end -= n; + + n = short_time_len(line, end - line); + if (!n) + n = fractional_time_len(line, end - line); + end -= n; + + n = date_len(line, end - line); + if (!n) /* No date. Too bad. */ + return 0; + end -= n; + + if (end == line) /* No space before date. */ + return 0; + if (end[-1] == '\t') { /* Success! */ + end--; + return line + len - end; + } + if (end[-1] != ' ') /* No space before date. */ + return 0; + + /* Whitespace damage. */ + end -= trailing_spaces_len(line, end - line); + return line + len - end; +} + +static char *find_name_common(const char *line, char *def, int p_value, + const char *end, int terminate) { int len; const char *start = NULL; - if (*line == '"') { - char *name = find_name_gnu(line, def, p_value); - if (name) - return name; - } - if (p_value == 0) start = line; - for (;;) { + while (line != end) { char c = *line; - if (isspace(c)) { + if (!end && isspace(c)) { if (c == '\n') break; if (name_terminate(start, line-start, c, terminate)) @@ -505,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate) return squash_slash(xmemdupz(start, len)); } +static char *find_name(const char *line, char *def, int p_value, int terminate) +{ + if (*line == '"') { + char *name = find_name_gnu(line, def, p_value); + if (name) + return name; + } + + return find_name_common(line, def, p_value, NULL, terminate); +} + +static char *find_name_traditional(const char *line, char *def, int p_value) +{ + size_t len = strlen(line); + size_t date_len; + + if (*line == '"') { + char *name = find_name_gnu(line, def, p_value); + if (name) + return name; + } + + len = strchrnul(line, '\n') - line; + date_len = diff_timestamp_len(line, len); + if (!date_len) + return find_name(line, def, p_value, TERM_TAB); + len -= date_len; + + return find_name_common(line, def, p_value, line + len, 0); +} + static int count_slashes(const char *cp) { int cnt = 0; @@ -527,7 +692,7 @@ static int guess_p_value(const char *nameline) if (is_dev_null(nameline)) return -1; - name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB); + name = find_name_traditional(nameline, NULL, 0); if (!name) return -1; cp = strchr(name, '/'); @@ -646,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc if (is_dev_null(first)) { patch->is_new = 1; patch->is_delete = 0; - name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(second, NULL, p_value); patch->new_name = name; } else if (is_dev_null(second)) { patch->is_new = 0; patch->is_delete = 1; - name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(first, NULL, p_value); patch->old_name = name; } else { - name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); - name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(first, NULL, p_value); + name = find_name_traditional(second, name, p_value); if (has_epoch_timestamp(first)) { patch->is_new = 1; patch->is_delete = 0; diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh index 2dcb040..52f9f5b 100755 --- a/t/t4135-apply-weird-filenames.sh +++ b/t/t4135-apply-weird-filenames.sh @@ -94,8 +94,8 @@ try_filename() { } try_filename 'plain' 'postimage.txt' -try_filename 'with spaces' 'post image.txt' success failure failure -try_filename 'with tab' 'post image.txt' success failure failure +try_filename 'with spaces' 'post image.txt' +try_filename 'with tab' 'post image.txt' try_filename 'with backslash' 'post\image.txt' try_filename 'with quote' '"postimage".txt' success failure success -- 1.7.2.rc3 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html