> fast-import has subtle differences in how it parses file paths between each > occurrence of <path> in the grammar. Many errors are suppressed or not checked, > which could lead to silent data corruption. A particularly bad case is when a > front-end sent escapes that Git doesn't recognize (e.g., hex escapes are not > supported), it would be treated as literal bytes instead of a quoted string. > > Bring path parsing into line with the documented behavior and improve > documentation to fill in missing details. Updated to address review comments. Thanks, Patrick! Changes since v2: * Fix NUL overrun by replacing `strchr(p, ' ')` with `strchrnul(p, ' ')` in patch 1/8 * Fix "Missing dest" error condition in patch 1/8 * Test missing space after unquoted path * Substitute shell parameters in test_expect_success call, instead of with string splicing * Reformat (-subshells * Rewrap long lines in `parse_path` and `parse_path_space` Hopefully, this series sends without any rewrapped lines. I use Proton Mail via Proton Mail Bridge and Apple Mail. I have no idea how to control this, or if I even can, and see no relevant-looking settings in any of the three. In v2 and now v3, I only manually modified the cover letter after using format-patch, not any of the others. Thalia Thalia Archibald (8): fast-import: tighten path unquoting fast-import: directly use strbufs for paths fast-import: allow unquoted empty path for root fast-import: remove dead strbuf fast-import: improve documentation for unquoted paths fast-import: document C-style escapes for paths fast-import: forbid escaped NUL in paths fast-import: make comments more precise Documentation/git-fast-import.txt | 30 +- builtin/fast-import.c | 158 ++++---- t/t9300-fast-import.sh | 624 +++++++++++++++++++++--------- 3 files changed, 550 insertions(+), 262 deletions(-) Range-diff against v2: 1: e790bdf714 ! 1: d9ab0c6a75 fast-import: tighten path unquoting @@ builtin/fast-import.c: static uintmax_t parse_mark_ref_space(const char **p) + * or unquoted without escape sequences. When unquoted, it may only contain a + * space if `include_spaces` is nonzero. + */ -+static void parse_path(struct strbuf *sb, const char *p, const char **endp, int include_spaces, const char *field) ++static void parse_path(struct strbuf *sb, const char *p, const char **endp, ++ int include_spaces, const char *field) +{ + if (*p == '"') { + if (unquote_c_style(sb, p, endp)) @@ builtin/fast-import.c: static uintmax_t parse_mark_ref_space(const char **p) + if (include_spaces) + *endp = p + strlen(p); + else -+ *endp = strchr(p, ' '); ++ *endp = strchrnul(p, ' '); + strbuf_add(sb, p, *endp - p); + } +} @@ builtin/fast-import.c: static uintmax_t parse_mark_ref_space(const char **p) + * It may not contain spaces when unquoted. Update *endp to point to the first + * character after the space. + */ -+static void parse_path_space(struct strbuf *sb, const char *p, const char **endp, const char *field) ++static void parse_path_space(struct strbuf *sb, const char *p, ++ const char **endp, const char *field) +{ + parse_path(sb, p, endp, 0, field); + if (**endp != ' ') @@ builtin/fast-import.c: static void file_change_m(const char *p, struct branch *b - endp++; - if (!*endp) -+ if (!p) ++ if (!*p) die("Missing dest: %s", command_buf.buf); - - d = endp; @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit filemodify + COMMIT + from :301 -+ M 100644 :402 '"$path"' ++ M 100644 :402 $path + + commit refs/heads/S-path-eol + mark :303 @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit filedelete + COMMIT + from :302 -+ D '"$path"' ++ D $path + + commit refs/heads/S-path-eol + mark :304 @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit filecopy dest + COMMIT + from :301 -+ C hello.c '"$path"' ++ C hello.c $path + + commit refs/heads/S-path-eol + mark :305 @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit filerename dest + COMMIT + from :301 -+ R hello.c '"$path"' ++ R hello.c $path + -+ ls :305 '"$path"' ++ ls :305 $path + EOF + + commit_m=$(grep :302 marks.out | cut -d\ -f2) && @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + blob1=$(grep :401 marks.out | cut -d\ -f2) && + blob2=$(grep :402 marks.out | cut -d\ -f2) && + -+ ( printf "100644 blob $blob2\t'"$unquoted_path"'\n" && -+ printf "100644 blob $blob1\thello.c\n" ) | sort >tree_m.exp && ++ ( ++ printf "100644 blob $blob2\t$unquoted_path\n" && ++ printf "100644 blob $blob1\thello.c\n" ++ ) | sort >tree_m.exp && + git ls-tree $commit_m | sort >tree_m.out && + test_cmp tree_m.exp tree_m.out && + @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + git ls-tree $commit_d >tree_d.out && + test_cmp tree_d.exp tree_d.out && + -+ ( printf "100644 blob $blob1\t'"$unquoted_path"'\n" && -+ printf "100644 blob $blob1\thello.c\n" ) | sort >tree_c.exp && ++ ( ++ printf "100644 blob $blob1\t$unquoted_path\n" && ++ printf "100644 blob $blob1\thello.c\n" ++ ) | sort >tree_c.exp && + git ls-tree $commit_c | sort >tree_c.out && + test_cmp tree_c.exp tree_c.out && + -+ printf "100644 blob $blob1\t'"$unquoted_path"'\n" >tree_r.exp && ++ printf "100644 blob $blob1\t$unquoted_path\n" >tree_r.exp && + git ls-tree $commit_r >tree_r.out && + test_cmp tree_r.exp tree_r.out && + @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + data <<COMMIT + initial commit + COMMIT -+ M 100644 :401 '"$path"' ++ M 100644 :401 $path + + commit refs/heads/S-path-space + mark :302 @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit filecopy source + COMMIT + from :301 -+ C '"$path"' hello2.c ++ C $path hello2.c + + commit refs/heads/S-path-space + mark :303 @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit filerename source + COMMIT + from :301 -+ R '"$path"' hello2.c ++ R $path hello2.c + + EOF + @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit_r=$(grep :303 marks.out | cut -d\ -f2) && + blob=$(grep :401 marks.out | cut -d\ -f2) && + -+ ( printf "100644 blob $blob\t'"$unquoted_path"'\n" && -+ printf "100644 blob $blob\thello2.c\n" ) | sort >tree_c.exp && ++ ( ++ printf "100644 blob $blob\t$unquoted_path\n" && ++ printf "100644 blob $blob\thello2.c\n" ++ ) | sort >tree_c.exp && + git ls-tree $commit_c | sort >tree_c.out && + test_cmp tree_c.exp tree_c.out && + @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + commit with bad path + COMMIT + from :2 -+ '"$prefix$path$suffix"' ++ $prefix$path$suffix + EOF + -+ test_grep '"'$err_grep'"' err ++ test_grep "$err_grep" err + ' +} + @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must + test_path_fail "$change" "invalid escape in quoted $field" "$prefix" '"hello\xff"' "$suffix" "Invalid $field" +} +test_path_eol_quoted_fail () { -+ local change="$1" prefix="$2" field="$3" suffix="$4" -+ test_path_base_fail "$change" "$prefix" "$field" "$suffix" -+ test_path_fail "$change" "garbage after quoted $field" "$prefix" '"hello.c"x' "$suffix" "Garbage after $field" -+ test_path_fail "$change" "space after quoted $field" "$prefix" '"hello.c" ' "$suffix" "Garbage after $field" ++ local change="$1" prefix="$2" field="$3" ++ test_path_base_fail "$change" "$prefix" "$field" '' ++ test_path_fail "$change" "garbage after quoted $field" "$prefix" '"hello.c"' 'x' "Garbage after $field" ++ test_path_fail "$change" "space after quoted $field" "$prefix" '"hello.c"' ' ' "Garbage after $field" +} +test_path_eol_fail () { -+ local change="$1" prefix="$2" field="$3" suffix="$4" -+ test_path_eol_quoted_fail "$change" "$prefix" "$field" "$suffix" ++ local change="$1" prefix="$2" field="$3" ++ test_path_eol_quoted_fail "$change" "$prefix" "$field" +} +test_path_space_fail () { -+ local change="$1" prefix="$2" field="$3" suffix="$4" -+ test_path_base_fail "$change" "$prefix" "$field" "$suffix" -+ test_path_fail "$change" "missing space after quoted $field" "$prefix" '"hello.c"x' "$suffix" "Missing space after $field" ++ local change="$1" prefix="$2" field="$3" ++ test_path_base_fail "$change" "$prefix" "$field" ' world.c' ++ test_path_fail "$change" "missing space after quoted $field" "$prefix" '"hello.c"' 'x world.c' "Missing space after $field" ++ test_path_fail "$change" "missing space after unquoted $field" "$prefix" 'hello.c' '' "Missing space after $field" +} + -+test_path_eol_fail filemodify 'M 100644 :1 ' path '' -+test_path_eol_fail filedelete 'D ' path '' -+test_path_space_fail filecopy 'C ' source ' world.c' -+test_path_eol_fail filecopy 'C hello.c ' dest '' -+test_path_space_fail filerename 'R ' source ' world.c' -+test_path_eol_fail filerename 'R hello.c ' dest '' -+test_path_eol_fail 'ls (in commit)' 'ls :2 ' path '' ++test_path_eol_fail filemodify 'M 100644 :1 ' path ++test_path_eol_fail filedelete 'D ' path ++test_path_space_fail filecopy 'C ' source ++test_path_eol_fail filecopy 'C hello.c ' dest ++test_path_space_fail filerename 'R ' source ++test_path_eol_fail filerename 'R hello.c ' dest ++test_path_eol_fail 'ls (in commit)' 'ls :2 ' path + +# When 'ls' has no <dataref>, the <path> must be quoted. -+test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path '' ++test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path + ### ### series T (ls) 2: 82a6f53c13 ! 2: 696ca27bb7 fast-import: directly use strbufs for paths @@ Commit message Signed-off-by: Thalia Archibald <thalia@xxxxxxxxxxxxx> ## builtin/fast-import.c ## -@@ builtin/fast-import.c: static void parse_path_space(struct strbuf *sb, const char *p, const char **endp +@@ builtin/fast-import.c: static void parse_path_space(struct strbuf *sb, const char *p, static void file_change_m(const char *p, struct branch *b) { @@ builtin/fast-import.c: static void file_change_m(const char *p, struct branch *b + strbuf_reset(&source); + parse_path_space(&source, p, &p, "source"); - if (!p) + if (!*p) die("Missing dest: %s", command_buf.buf); - strbuf_reset(&d_uq); - parse_path_eol(&d_uq, p, "dest"); 3: 893bbf5e73 ! 3: 39879d0a66 fast-import: allow unquoted empty path for root @@ Commit message ## builtin/fast-import.c ## @@ builtin/fast-import.c: static void file_change_cr(const char *p, struct branch *b, int rename) - struct tree_entry leaf; strbuf_reset(&source); -- parse_path_space(&source, p, &p, "source"); + parse_path_space(&source, p, &p, "source"); - -- if (!p) +- if (!*p) - die("Missing dest: %s", command_buf.buf); strbuf_reset(&dest); -+ parse_path_space(&source, p, &p, "source"); parse_path_eol(&dest, p, "dest"); - memset(&leaf, 0, sizeof(leaf)); ## t/t9300-fast-import.sh ## @@ t/t9300-fast-import.sh: test_expect_success 'M: rename subdirectory to new subdirectory' ' @@ t/t9300-fast-import.sh: test_expect_success 'M: rename subdirectory to new subdi - from refs/heads/M2^0 - R "" sub + from refs/heads/M2^0 -+ R '"$root"' sub ++ R $root sub - INPUT_END + INPUT_END @@ t/t9300-fast-import.sh: test_expect_success PIPE 'N: empty directory reads as mi - compare_diff_raw expect actual -' + from refs/heads/branch^0 -+ M 040000 $root_tree '"$root"' ++ M 040000 $root_tree $root + INPUT_END + git fast-import <input && + git diff-tree -C --find-copies-harder -r N4 N6 >actual && @@ t/t9300-fast-import.sh: test_expect_success PIPE 'N: empty directory reads as mi - compare_diff_raw expect actual -' + from refs/heads/branch^0 -+ C '"$root"' oldroot ++ C $root oldroot + INPUT_END + git fast-import <input && + git diff-tree -C --find-copies-harder -r branch N-copy-root-path >actual && @@ t/t9300-fast-import.sh: test_expect_success 'N: reject foo/ syntax in ls argumen - test_cmp expect.foo actual.foo && - test_cmp expect.bar actual.bar -' -+ M 040000 $tree '"$root"' ++ M 040000 $tree $root + M 644 inline foo/foo + data <<EOF + hello, world @@ t/t9300-fast-import.sh: test_expect_success 'N: reject foo/ syntax in ls argumen - git fast-import <input && - git diff --exit-code branch:newdir N9 -' -+ M 040000 $branch '"$root"' -+ C "newdir" '"$root"' ++ M 040000 $branch $root ++ C "newdir" $root + INPUT_END + git fast-import <input && + git diff --exit-code branch:newdir N9 @@ t/t9300-fast-import.sh: test_expect_success 'N: reject foo/ syntax in ls argumen - test_cmp expect.baz actual.baz && - test_cmp expect.qux actual.qux && - test_cmp expect.qux actual.quux' -+ M 040000 $tree '"$root"' ++ M 040000 $tree $root + M 100644 inline foo/bar/qux + data <<EOF + hello, world + EOF -+ R "foo" '"$root"' ++ R "foo" $root + C "bar/qux" "bar/quux" + INPUT_END + git show N11:bar/baz >actual.baz && @@ t/t9300-fast-import.sh: test_expect_success 'S: ls with garbage after sha1 must # # -@@ t/t9300-fast-import.sh: test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path '' +@@ t/t9300-fast-import.sh: test_path_eol_quoted_fail 'ls (without dataref in commit)' 'ls ' path ### # Setup is carried over from series S. @@ t/t9300-fast-import.sh: test_expect_success 'U: validate directory delete result + must succeed + COMMIT + from refs/heads/U^0 -+ D '"$root"' ++ D $root - INPUT_END + INPUT_END 4: cb05a184e6 = 4: 1cef05e59a fast-import: remove dead strbuf 5: 1f34b632d7 = 5: 2e78690023 fast-import: improve documentation for unquoted paths 6: 82a4da68af = 6: 1b07ddffe0 fast-import: document C-style escapes for paths 7: c087c6a860 ! 7: dc67464b6a fast-import: forbid escaped NUL in paths @@ Documentation/git-fast-import.txt: and must be in canonical form. That is it mus `filedelete` ## builtin/fast-import.c ## -@@ builtin/fast-import.c: static void parse_path(struct strbuf *sb, const char *p, const char **endp, int +@@ builtin/fast-import.c: static void parse_path(struct strbuf *sb, const char *p, const char **endp, if (*p == '"') { if (unquote_c_style(sb, p, endp)) die("Invalid %s: %s", field, command_buf.buf); @@ t/t9300-fast-import.sh: test_path_base_fail () { + test_path_fail "$change" "escaped NUL in quoted $field" "$prefix" '"hello\000"' "$suffix" "NUL in $field" } test_path_eol_quoted_fail () { - local change="$1" prefix="$2" field="$3" suffix="$4" + local change="$1" prefix="$2" field="$3" 8: a503c55b83 = 8: 5e02d887bc fast-import: make comments more precise -- 2.44.0