From: Luke Shumaker <lukeshu@xxxxxxxxxxx> fast-export's helper function find_encoding() takes a `const char *`, but modifies that memory despite the `const`. Ultimately, this memory came from get_commit_buffer(), and you're not supposed to modify the memory that you get from get_commit_buffer(). So, get rid of find_encoding() in favor of commit.h:find_commit_header(), which gives back a string length, rather than mutating the memory to insert a '\0' terminator. Because find_commit_header() detects the "\n\n" string that separates the headers and the commit message, move the call to be above the `message = strstr(..., "\n\n")` call. This helps readability, and allows for the value of `encoding` to be used for a better value of "..." so that the same memory doesn't need to be checked twice. Introduce a `commit_buffer_cursor` variable to avoid writing an awkward `encoding ? encoding + encoding_len : committer_end` expression. Signed-off-by: Luke Shumaker <lukeshu@xxxxxxxxxxx> --- Notes: v4: This commit is new in v4. builtin/fast-export.c | 65 ++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index d1cb8a3183..81f3fb1f05 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -499,21 +499,6 @@ static void show_filemodify(struct diff_queue_struct *q, } } -static const char *find_encoding(const char *begin, const char *end) -{ - const char *needle = "\nencoding "; - char *bol, *eol; - - bol = memmem(begin, end ? end - begin : strlen(begin), - needle, strlen(needle)); - if (!bol) - return NULL; - bol += strlen(needle); - eol = strchrnul(bol, '\n'); - *eol = '\0'; - return bol; -} - static char *anonymize_ref_component(void *data) { static int counter; @@ -615,13 +600,26 @@ static void anonymize_ident_line(const char **beg, const char **end) *end = out->buf + out->len; } +static char *reencode_message(const char *in_msg, + const char *in_encoding, size_t in_encoding_len) +{ + static struct strbuf in_encoding_buf = STRBUF_INIT; + + strbuf_reset(&in_encoding_buf); + strbuf_add(&in_encoding_buf, in_encoding, in_encoding_len); + + return reencode_string(in_msg, "UTF-8", in_encoding_buf.buf); +} + static void handle_commit(struct commit *commit, struct rev_info *rev, struct string_list *paths_of_changed_objects) { int saved_output_format = rev->diffopt.output_format; - const char *commit_buffer; + const char *commit_buffer, *commit_buffer_cursor; const char *author, *author_end, *committer, *committer_end; - const char *encoding, *message; + const char *encoding; + size_t encoding_len; + const char *message; char *reencoded = NULL; struct commit_list *p; const char *refname; @@ -630,21 +628,31 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - commit_buffer = get_commit_buffer(commit, NULL); - author = strstr(commit_buffer, "\nauthor "); + commit_buffer_cursor = commit_buffer = get_commit_buffer(commit, NULL); + + author = strstr(commit_buffer_cursor, "\nauthor "); if (!author) die("could not find author in commit %s", oid_to_hex(&commit->object.oid)); author++; - author_end = strchrnul(author, '\n'); - committer = strstr(author_end, "\ncommitter "); + commit_buffer_cursor = author_end = strchrnul(author, '\n'); + + committer = strstr(commit_buffer_cursor, "\ncommitter "); if (!committer) die("could not find committer in commit %s", oid_to_hex(&commit->object.oid)); committer++; - committer_end = strchrnul(committer, '\n'); - message = strstr(committer_end, "\n\n"); - encoding = find_encoding(committer_end, message); + commit_buffer_cursor = committer_end = strchrnul(committer, '\n'); + + /* find_commit_header() gets a `+ 1` because + * commit_buffer_cursor points at the trailing "\n" at the end + * of the previous line, but find_commit_header() wants a + * pointer to the beginning of the next line. */ + encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len); + if (encoding) + commit_buffer_cursor = encoding + encoding_len; + + message = strstr(commit_buffer_cursor, "\n\n"); if (message) message += 2; @@ -685,14 +693,15 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, } else if (encoding) { switch(reencode_mode) { case REENCODE_YES: - reencoded = reencode_string(message, "UTF-8", encoding); + reencoded = reencode_message(message, encoding, encoding_len); break; case REENCODE_NO: break; case REENCODE_ABORT: - die("Encountered commit-specific encoding %s in commit " + die("Encountered commit-specific encoding %.*s in commit " "%s; use --reencode=[yes|no] to handle it", - encoding, oid_to_hex(&commit->object.oid)); + (int)encoding_len, encoding, + oid_to_hex(&commit->object.oid)); } } if (!commit->parents) @@ -704,7 +713,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, (int)(author_end - author), author, (int)(committer_end - committer), committer); if (!reencoded && encoding) - printf("encoding %s\n", encoding); + printf("encoding %.*s\n", (int)encoding_len, encoding); printf("data %u\n%s", (unsigned)(reencoded ? strlen(reencoded) : message -- 2.31.1