While stress testing `git filter-repo`, I noticed an issue with encoding; further digging led to the fixes and features in this series. See the individual commit messages for details. Changes since v1 (full range-diff below): * Applied style fixes Eric pointed out in his review (thanks!) * Rebased on latest master (83232e38, "The seventh batch"), resolving a trivial merge conflict. Now merges cleanly with next and pu as well. I'm a bit under the weather so I may be slow to respond... Elijah Newren (5): t9350: fix encoding test to actually test reencoding fast-import: support 'encoding' commit header fast-export: avoid stripping encoding header if we cannot reencode fast-export: differentiate between explicitly utf-8 and implicitly utf-8 fast-export: do automatic reencoding of commit messages only if requested Documentation/git-fast-import.txt | 7 ++++ builtin/fast-export.c | 44 +++++++++++++++++++++---- fast-import.c | 11 +++++-- t/t9300-fast-import.sh | 20 ++++++++++++ t/t9350-fast-export.sh | 53 +++++++++++++++++++++++++------ 5 files changed, 118 insertions(+), 17 deletions(-) Range-diff: 1: d6efd05142 ! 1: 9cc04242bd t9350: fix encoding test to actually test reencoding @@ -26,8 +26,7 @@ - # use author and committer name in ISO-8859-1 to match it. - . "$TEST_DIRECTORY"/t3901/8859-1.txt && + test_when_finished "git reset --hard HEAD~1" && -+ test_when_finished "git config --unset i18n.commitencoding" && -+ git config i18n.commitencoding iso-8859-7 && ++ test_config i18n.commitencoding iso-8859-7 && test_tick && echo rosten >file && - git commit -s -m den file && 2: 02f48c7559 ! 2: 0cd023ac7a fast-import: support 'encoding' commit header @@ -51,9 +51,8 @@ } if (!committer) die("Expected committer but didn't get one"); -+ if (skip_prefix(command_buf.buf, "encoding ", &encoding)) { ++ if (skip_prefix(command_buf.buf, "encoding ", &encoding)) + read_next_command(); -+ } parse_data(&msg, 0, NULL); read_next_command(); parse_from(b); @@ -69,7 +68,7 @@ + strbuf_addf(&new_data, + "encoding %s\n", + encoding); -+ strbuf_addf(&new_data, "\n"); ++ strbuf_addch(&new_data, '\n'); strbuf_addbuf(&new_data, &msg); free(author); free(committer); @@ -78,14 +77,14 @@ --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ - background_import_still_running + sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import ' +### -+### series W (other new features) ++### series X (other new features) +### + -+test_expect_success 'W: handling encoding' ' ++test_expect_success 'X: handling encoding' ' + test_tick && + cat >input <<-INPUT_END && + commit refs/heads/encoding 3: 86c348402d ! 3: 1fddf51402 fast-export: avoid stripping encoding header if we cannot reencode @@ -41,8 +41,7 @@ +test_expect_success 'encoding preserved if reencoding fails' ' + + test_when_finished "git reset --hard HEAD~1" && -+ test_when_finished "git config --unset i18n.commitencoding" && -+ git config i18n.commitencoding iso-8859-7 && ++ test_config i18n.commitencoding iso-8859-7 && + echo rosten >file && + git commit -s -m "$(printf "Pi: \360; Invalid: \377")" file && + git fast-export wer^..wer >iso-8859-7.fi && 4: c09b23bc59 = 4: 4a2e04b3ae fast-export: differentiate between explicitly utf-8 and implicitly utf-8 5: 24b69a0db9 ! 5: 44aacb1a0b fast-export: do automatic reencoding of commit messages only if requested @@ -92,8 +92,7 @@ +test_expect_success 'reencoding iso-8859-7' ' test_when_finished "git reset --hard HEAD~1" && - test_when_finished "git config --unset i18n.commitencoding" && -@@ + test_config i18n.commitencoding iso-8859-7 && test_tick && echo rosten >file && git commit -s -m "$(printf "Pi: \360")" file && @@ -109,8 +108,7 @@ +test_expect_success 'aborting on iso-8859-7' ' + + test_when_finished "git reset --hard HEAD~1" && -+ test_when_finished "git config --unset i18n.commitencoding" && -+ git config i18n.commitencoding iso-8859-7 && ++ test_config i18n.commitencoding iso-8859-7 && + echo rosten >file && + git commit -s -m "$(printf "Pi: \360")" file && + test_must_fail git fast-export --reencode=abort wer^..wer >iso-8859-7.fi @@ -119,8 +117,7 @@ +test_expect_success 'preserving iso-8859-7' ' + + test_when_finished "git reset --hard HEAD~1" && -+ test_when_finished "git config --unset i18n.commitencoding" && -+ git config i18n.commitencoding iso-8859-7 && ++ test_config i18n.commitencoding iso-8859-7 && + echo rosten >file && + git commit -s -m "$(printf "Pi: \360")" file && + git fast-export --reencode=no wer^..wer >iso-8859-7.fi && @@ -134,8 +131,7 @@ test_expect_success 'encoding preserved if reencoding fails' ' test_when_finished "git reset --hard HEAD~1" && -@@ - git config i18n.commitencoding iso-8859-7 && + test_config i18n.commitencoding iso-8859-7 && echo rosten >file && git commit -s -m "$(printf "Pi: \360; Invalid: \377")" file && - git fast-export wer^..wer >iso-8859-7.fi && -- 2.21.0.782.g44aacb1a0b