> fast-import has subtle differences in how it parses file paths between each > occurrence of <path> in the grammar. Many errors are suppressed or not checked, > which could lead to silent data corruption. A particularly bad case is when a > front-end sent escapes that Git doesn't recognize (e.g., hex escapes are not > supported), it would be treated as literal bytes instead of a quoted string. > > Bring path parsing into line with the documented behavior and improve > documentation to fill in missing details. Changes since v4: * Refine C comments and parameter name. Thalia Thalia Archibald (8): fast-import: tighten path unquoting fast-import: directly use strbufs for paths fast-import: allow unquoted empty path for root fast-import: remove dead strbuf fast-import: improve documentation for path quoting fast-import: document C-style escapes for paths fast-import: forbid escaped NUL in paths fast-import: make comments more precise Documentation/git-fast-import.txt | 31 +- builtin/fast-import.c | 162 ++++---- t/t9300-fast-import.sh | 624 +++++++++++++++++++++--------- 3 files changed, 555 insertions(+), 262 deletions(-) Range-diff against v4: 1: d6ea8aca46 ! 1: 2c18fe5fe9 fast-import: tighten path unquoting @@ builtin/fast-import.c: static uintmax_t parse_mark_ref_space(const char **p) } +/* -+ * Parse the path string into the strbuf. It may be quoted with escape sequences -+ * or unquoted without escape sequences. When unquoted, it may only contain a -+ * space if `include_spaces` is nonzero. ++ * Parse the path string into the strbuf. The path can either be quoted with ++ * escape sequences or unquoted without escape sequences. Unquoted strings may ++ * contain spaces only if `is_last_field` is nonzero; otherwise, it stops ++ * parsing at the first space. + */ +static void parse_path(struct strbuf *sb, const char *p, const char **endp, -+ int include_spaces, const char *field) ++ int is_last_field, const char *field) +{ + if (*p == '"') { + if (unquote_c_style(sb, p, endp)) + die("Invalid %s: %s", field, command_buf.buf); + } else { -+ if (include_spaces) -+ *endp = p + strlen(p); -+ else -+ *endp = strchrnul(p, ' '); ++ /* ++ * Unless we are parsing the last field of a line, ++ * SP is the end of this field. ++ */ ++ *endp = is_last_field ++ ? p + strlen(p) ++ : strchrnul(p, ' '); + strbuf_add(sb, p, *endp - p); + } +} + +/* + * Parse the path string into the strbuf, and complain if this is not the end of -+ * the string. It may contain spaces even when unquoted. ++ * the string. Unquoted strings may contain spaces. + */ +static void parse_path_eol(struct strbuf *sb, const char *p, const char *field) +{ @@ builtin/fast-import.c: static uintmax_t parse_mark_ref_space(const char **p) + +/* + * Parse the path string into the strbuf, and ensure it is followed by a space. -+ * It may not contain spaces when unquoted. Update *endp to point to the first ++ * Unquoted strings may not contain spaces. Update *endp to point to the first + * character after the space. + */ +static void parse_path_space(struct strbuf *sb, const char *p, 2: 9499f34aae = 2: 4e9f3aa52c fast-import: directly use strbufs for paths 3: 9b1e6b80f5 = 3: cae5764cec fast-import: allow unquoted empty path for root 4: 1a2b0dc616 = 4: 96ff70895a fast-import: remove dead strbuf 5: fb0d870d53 = 5: e1a1b0395d fast-import: improve documentation for path quoting 6: 4b6017ded8 = 6: 08e6fb37be fast-import: document C-style escapes for paths 7: 5b464f4b01 = 7: a01d0a1b25 fast-import: forbid escaped NUL in paths 8: 6eb66fce45 = 8: 65d7896e39 fast-import: make comments more precise -- 2.44.0