From: Torsten Bögershausen <tboegi@xxxxxx> When a text file had been commited with CRLF and the file is commited again, the CRLF are kept if .gitattributs has "text=auto". This is done by analyzing the content of the blob stored in the index: If a '\r' is found, Git assumes that the blob was commited with CRLF. The simple search for a '\r' does not always work as expected: A file is encoded in UTF-16 with CRLF and commited. Git treats it as binary. Now the content is converted into UTF-8. At the next commit Git treats the file as text, the CRLF should be converted into LF, but isn't. Solution: Replace has_cr_in_index() with has_crlf_in_index(). When no '\r' is found, 0 is returned directly, this is the most common case. If a '\r' is found, the content is analyzed more deeply. Reported-By: Ashish Negi <ashishnegi33@xxxxxxxxx> Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx> --- convert.c | 19 +++++++++---- t/t0027-auto-crlf.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/convert.c b/convert.c index 20d7ab67bd..63ef799239 100644 --- a/convert.c +++ b/convert.c @@ -220,18 +220,27 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action, } } -static int has_cr_in_index(const struct index_state *istate, const char *path) +static int has_crlf_in_index(const struct index_state *istate, const char *path) { unsigned long sz; void *data; - int has_cr; + const char *crp; + int has_crlf = 0; data = read_blob_data_from_index(istate, path, &sz); if (!data) return 0; - has_cr = memchr(data, '\r', sz) != NULL; + + crp = memchr(data, '\r', sz); + if (crp && (crp[1] == '\n')) { + unsigned int ret_stats; + ret_stats = gather_convert_stats(data, sz); + if (!(ret_stats & CONVERT_STAT_BITS_BIN) && + (ret_stats & CONVERT_STAT_BITS_TXT_CRLF)) + has_crlf = 1; + } free(data); - return has_cr; + return has_crlf; } static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats, @@ -290,7 +299,7 @@ static int crlf_to_git(const struct index_state *istate, * cherry-pick. */ if ((checksafe != SAFE_CRLF_RENORMALIZE) && - has_cr_in_index(istate, path)) + has_crlf_in_index(istate, path)) convert_crlf_into_lf = 0; } if ((checksafe == SAFE_CRLF_WARN || diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index 68108d956a..0af35cfb1f 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -43,19 +43,31 @@ create_gitattributes () { } >.gitattributes } -create_NNO_files () { +# Create 2 sets of files: +# The NNO files are "Not NOrmalized in the repo. We use CRLF_mix_LF and store +# it under different names for the different test cases, see ${pfx} +# Depending on .gitattributes they are normalized at the next commit (or not) +# The MIX files have different contents in the repo. +# Depending on its contents, the "new safer autocrlf" may kick in. +create_NNO_MIX_files () { for crlf in false true input do for attr in "" auto text -text do for aeol in "" lf crlf do - pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} + pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} && cp CRLF_mix_LF ${pfx}_LF.txt && cp CRLF_mix_LF ${pfx}_CRLF.txt && cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt && cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt && - cp CRLF_mix_LF ${pfx}_CRLF_nul.txt + cp CRLF_mix_LF ${pfx}_CRLF_nul.txt && + pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} && + cp LF ${pfx}_LF.txt && + cp CRLF ${pfx}_CRLF.txt && + cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt && + cp LF_mix_CR ${pfx}_LF_mix_CR.txt && + cp CRLF_nul ${pfx}_CRLF_nul.txt done done done @@ -136,6 +148,49 @@ commit_chk_wrnNNO () { ' } +# Commit a file with mixed line endings on top of different files +# in the index. Check for warnings +commit_MIX_chkwrn () { + attr=$1 ; shift + aeol=$1 ; shift + crlf=$1 ; shift + lfwarn=$1 ; shift + crlfwarn=$1 ; shift + lfmixcrlf=$1 ; shift + lfmixcr=$1 ; shift + crlfnul=$1 ; shift + pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} + #Commit file with CLRF_mix_LF on top of existing file + create_gitattributes "$attr" $aeol && + for f in LF CRLF CRLF_mix_LF LF_mix_CR CRLF_nul + do + fname=${pfx}_$f.txt && + cp CRLF_mix_LF $fname && + printf Z >>"$fname" && + git -c core.autocrlf=$crlf add $fname 2>"${pfx}_$f.err" + done + + test_expect_success "commit file with mixed EOL crlf=$crlf attr=$attr LF" ' + check_warning "$lfwarn" ${pfx}_LF.err + ' + test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF" ' + check_warning "$crlfwarn" ${pfx}_CRLF.err + ' + + test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_mix_LF" ' + check_warning "$lfmixcrlf" ${pfx}_CRLF_mix_LF.err + ' + + test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf LF_mix_cr" ' + check_warning "$lfmixcr" ${pfx}_LF_mix_CR.err + ' + + test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_nul" ' + check_warning "$crlfnul" ${pfx}_CRLF_nul.err + ' +} + + stats_ascii () { case "$1" in LF) @@ -323,8 +378,8 @@ test_expect_success 'setup master' ' printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONE\r\nLINETWO\rLINETHREE" >CRLF_mix_CR && printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONEQ\r\nLINETWO\r\nLINETHREE" | q_to_nul >CRLF_nul && printf "\$Id: 0000000000000000000000000000000000000000 \$\nLINEONEQ\nLINETWO\nLINETHREE" | q_to_nul >LF_nul && - create_NNO_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF && - git -c core.autocrlf=false add NNO_*.txt && + create_NNO_MIX_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF && + git -c core.autocrlf=false add NNO_*.txt MIX_*.txt && git commit -m "mixed line endings" && test_tick ' @@ -385,6 +440,17 @@ test_expect_success 'commit files attr=crlf' ' commit_check_warn input "crlf" "LF_CRLF" "" "LF_CRLF" "LF_CRLF" "" ' +# Commit "CRLFmixLF" on top of these files already in the repo: +# LF, CRLF, CRLFmixLF LF_mix_CR CRLFNULL +# attr LF CRLF CRLFmixLF LF_mix_CR CRLFNUL +commit_MIX_chkwrn "" "" false "" "" "" "" "" +commit_MIX_chkwrn "" "" true "LF_CRLF" "" "" "LF_CRLF" "LF_CRLF" +commit_MIX_chkwrn "" "" input "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF" + +commit_MIX_chkwrn "auto" "" false "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF" +commit_MIX_chkwrn "auto" "" true "LF_CRLF" "" "" "LF_CRLF" "LF_CRLF" +commit_MIX_chkwrn "auto" "" input "CRLF_LF" "" "" "CRLF_LF" "CRLF_LF" + # attr LF CRLF CRLFmixLF LF_mix_CR CRLFNUL commit_chk_wrnNNO "" "" false "" "" "" "" "" commit_chk_wrnNNO "" "" true LF_CRLF "" "" "" "" -- 2.15.0.278.gbd97f72a1b.dirty