[PATCH v2 1/1] convert: tighten the safe autocrlf handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Torsten Bögershausen <tboegi@xxxxxx>

When a text file had been commited with CRLF and the file is commited
again, the CRLF are kept if .gitattributs has "text=auto".
This is done by analyzing the content of the blob stored in the index:
If a '\r' is found, Git assumes that the blob was commited with CRLF.

The simple search for a '\r' does not always work as expected:
A file is encoded in UTF-16 with CRLF and commited. Git treats it as binary.
Now the content is converted into UTF-8. At the next commit Git treats the
file as text, the CRLF should be converted into LF, but isn't.

Replace has_cr_in_index() with has_crlf_in_index(). When no '\r' is found,
0 is returned directly, this is the most common case.
If a '\r' is found, the content is analyzed more deeply.

Reported-By: Ashish Negi <ashishnegi33@xxxxxxxxx>
Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>
---

Changes against v1:
- change "if (crp && (crp[1] == '\n'))" to "if (crp)"
  (Thanks Eric. The new patch is more straightforward, and no risk to
   read out of data)
- Remove the "Solution:" in the commit message

Note:
  The original function has_cr_in_index() is from this commit:
    c4805393d73 (Finn Arne Gangstad   2010-05-12 00:37:57 +0200  225)
  And has this info:
    >Change autocrlf to not do any conversions to files that in the
    >repository already contain a CR. git with autocrlf set will never
    >create such a file, or change a LF only file to contain CRs, so the
    >(new) assumption is that if a file contains a CR, it is intentional,
    >and autocrlf should not change that.
  So the original assumption was slightly optimistic (but did work in 7 years)



convert.c            | 19 +++++++++----
 t/t0027-auto-crlf.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/convert.c b/convert.c
index 20d7ab67bd..1a41a48e15 100644
--- a/convert.c
+++ b/convert.c
@@ -220,18 +220,27 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
 	}
 }
 
-static int has_cr_in_index(const struct index_state *istate, const char *path)
+static int has_crlf_in_index(const struct index_state *istate, const char *path)
 {
 	unsigned long sz;
 	void *data;
-	int has_cr;
+	const char *crp;
+	int has_crlf = 0;
 
 	data = read_blob_data_from_index(istate, path, &sz);
 	if (!data)
 		return 0;
-	has_cr = memchr(data, '\r', sz) != NULL;
+
+	crp = memchr(data, '\r', sz);
+	if (crp) {
+		unsigned int ret_stats;
+		ret_stats = gather_convert_stats(data, sz);
+		if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
+		    (ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
+			has_crlf = 1;
+	}
 	free(data);
-	return has_cr;
+	return has_crlf;
 }
 
 static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
@@ -290,7 +299,7 @@ static int crlf_to_git(const struct index_state *istate,
 		 * cherry-pick.
 		 */
 		if ((checksafe != SAFE_CRLF_RENORMALIZE) &&
-		    has_cr_in_index(istate, path))
+		    has_crlf_in_index(istate, path))
 			convert_crlf_into_lf = 0;
 	}
 	if ((checksafe == SAFE_CRLF_WARN ||
diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh
index 68108d956a..0af35cfb1f 100755
--- a/t/t0027-auto-crlf.sh
+++ b/t/t0027-auto-crlf.sh
@@ -43,19 +43,31 @@ create_gitattributes () {
 	} >.gitattributes
 }
 
-create_NNO_files () {
+# Create 2 sets of files:
+# The NNO files are "Not NOrmalized in the repo. We use CRLF_mix_LF and store
+#   it under different names for the different test cases, see ${pfx}
+#   Depending on .gitattributes they are normalized at the next commit (or not)
+# The MIX files have different contents in the repo.
+#   Depending on its contents, the "new safer autocrlf" may kick in.
+create_NNO_MIX_files () {
 	for crlf in false true input
 	do
 		for attr in "" auto text -text
 		do
 			for aeol in "" lf crlf
 			do
-				pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf}
+				pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} &&
 				cp CRLF_mix_LF ${pfx}_LF.txt &&
 				cp CRLF_mix_LF ${pfx}_CRLF.txt &&
 				cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
 				cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
-				cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
+				cp CRLF_mix_LF ${pfx}_CRLF_nul.txt &&
+				pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} &&
+				cp LF          ${pfx}_LF.txt &&
+				cp CRLF        ${pfx}_CRLF.txt &&
+				cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
+				cp LF_mix_CR   ${pfx}_LF_mix_CR.txt &&
+				cp CRLF_nul    ${pfx}_CRLF_nul.txt
 			done
 		done
 	done
@@ -136,6 +148,49 @@ commit_chk_wrnNNO () {
 	'
 }
 
+# Commit a file with mixed line endings on top of different files
+# in the index. Check for warnings
+commit_MIX_chkwrn () {
+	attr=$1 ; shift
+	aeol=$1 ; shift
+	crlf=$1 ; shift
+	lfwarn=$1 ; shift
+	crlfwarn=$1 ; shift
+	lfmixcrlf=$1 ; shift
+	lfmixcr=$1 ; shift
+	crlfnul=$1 ; shift
+	pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf}
+	#Commit file with CLRF_mix_LF on top of existing file
+	create_gitattributes "$attr" $aeol &&
+	for f in LF CRLF CRLF_mix_LF LF_mix_CR CRLF_nul
+	do
+		fname=${pfx}_$f.txt &&
+		cp CRLF_mix_LF $fname &&
+		printf Z >>"$fname" &&
+		git -c core.autocrlf=$crlf add $fname 2>"${pfx}_$f.err"
+	done
+
+	test_expect_success "commit file with mixed EOL crlf=$crlf attr=$attr LF" '
+		check_warning "$lfwarn" ${pfx}_LF.err
+	'
+	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF" '
+		check_warning "$crlfwarn" ${pfx}_CRLF.err
+	'
+
+	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_mix_LF" '
+		check_warning "$lfmixcrlf" ${pfx}_CRLF_mix_LF.err
+	'
+
+	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf LF_mix_cr" '
+		check_warning "$lfmixcr" ${pfx}_LF_mix_CR.err
+	'
+
+	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_nul" '
+		check_warning "$crlfnul" ${pfx}_CRLF_nul.err
+	'
+}
+
+
 stats_ascii () {
 	case "$1" in
 	LF)
@@ -323,8 +378,8 @@ test_expect_success 'setup master' '
 	printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONE\r\nLINETWO\rLINETHREE"   >CRLF_mix_CR &&
 	printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONEQ\r\nLINETWO\r\nLINETHREE" | q_to_nul >CRLF_nul &&
 	printf "\$Id: 0000000000000000000000000000000000000000 \$\nLINEONEQ\nLINETWO\nLINETHREE" | q_to_nul >LF_nul &&
-	create_NNO_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
-	git -c core.autocrlf=false add NNO_*.txt &&
+	create_NNO_MIX_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
+	git -c core.autocrlf=false add NNO_*.txt MIX_*.txt &&
 	git commit -m "mixed line endings" &&
 	test_tick
 '
@@ -385,6 +440,17 @@ test_expect_success 'commit files attr=crlf' '
 	commit_check_warn input "crlf" "LF_CRLF" ""        "LF_CRLF" "LF_CRLF" ""
 '
 
+# Commit "CRLFmixLF" on top of these files already in the repo:
+# LF, CRLF, CRLFmixLF LF_mix_CR CRLFNULL
+#                 attr                    LF        CRLF      CRLFmixLF   LF_mix_CR   CRLFNUL
+commit_MIX_chkwrn ""      ""      false   ""        ""        ""          ""          ""
+commit_MIX_chkwrn ""      ""      true    "LF_CRLF" ""        ""          "LF_CRLF"   "LF_CRLF"
+commit_MIX_chkwrn ""      ""      input   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
+
+commit_MIX_chkwrn "auto"  ""      false   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
+commit_MIX_chkwrn "auto"  ""      true    "LF_CRLF" ""        ""          "LF_CRLF"   "LF_CRLF"
+commit_MIX_chkwrn "auto"  ""      input   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
+
 #                 attr                    LF        CRLF      CRLFmixLF   LF_mix_CR   CRLFNUL
 commit_chk_wrnNNO ""      ""      false   ""        ""        ""          ""          ""
 commit_chk_wrnNNO ""      ""      true    LF_CRLF   ""        ""          ""          ""
-- 
2.15.0.278.gbd97f72a1b.dirty




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux