Re: [PATCH 1/1] convert: tighten the safe autocrlf handling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



tboegi@xxxxxx writes:

> From: Torsten Bögershausen <tboegi@xxxxxx>
>
> When a text file had been commited with CRLF and the file is commited
> again, the CRLF are kept if .gitattributs has "text=auto".
> This is done by analyzing the content of the blob stored in the index:
> If a '\r' is found, Git assumes that the blob was commited with CRLF.
>
> The simple search for a '\r' does not always work as expected:
> A file is encoded in UTF-16 with CRLF and commited. Git treats it as binary.
> Now the content is converted into UTF-8. At the next commit Git treats the
> file as text, the CRLF should be converted into LF, but isn't.
>
> Solution:

Remove this line.

> Replace has_cr_in_index() with has_crlf_in_index(). When no '\r' is found,
> 0 is returned directly, this is the most common case.
> If a '\r' is found, the content is analyzed more deeply.

I may be recalling things incorrectly, but didn't an old version of
the code check CRLF explicitly, unlike the current implementation
that only check CRs?

In any case, I think we have accumulated enough cruft only to work
around the issues caused by "safe" crlf.  I moderately strongly
wonder if we should go back and think if that "feature" is adding
much value, and remove it if it is not.

In the meantime, let's queue this fix on top of the "safe crlf
workaround" pile.

Thanks.

>
> Reported-By: Ashish Negi <ashishnegi33@xxxxxxxxx>
> Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>
> ---
>  convert.c            | 19 +++++++++----
>  t/t0027-auto-crlf.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 85 insertions(+), 10 deletions(-)
>
> diff --git a/convert.c b/convert.c
> index 20d7ab67bd..63ef799239 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -220,18 +220,27 @@ static void check_safe_crlf(const char *path, enum crlf_action crlf_action,
>  	}
>  }
>  
> -static int has_cr_in_index(const struct index_state *istate, const char *path)
> +static int has_crlf_in_index(const struct index_state *istate, const char *path)
>  {
>  	unsigned long sz;
>  	void *data;
> -	int has_cr;
> +	const char *crp;
> +	int has_crlf = 0;
>  
>  	data = read_blob_data_from_index(istate, path, &sz);
>  	if (!data)
>  		return 0;
> -	has_cr = memchr(data, '\r', sz) != NULL;
> +
> +	crp = memchr(data, '\r', sz);
> +	if (crp && (crp[1] == '\n')) {
> +		unsigned int ret_stats;
> +		ret_stats = gather_convert_stats(data, sz);
> +		if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&
> +		    (ret_stats & CONVERT_STAT_BITS_TXT_CRLF))
> +			has_crlf = 1;
> +	}
>  	free(data);
> -	return has_cr;
> +	return has_crlf;
>  }
>  
>  static int will_convert_lf_to_crlf(size_t len, struct text_stat *stats,
> @@ -290,7 +299,7 @@ static int crlf_to_git(const struct index_state *istate,
>  		 * cherry-pick.
>  		 */
>  		if ((checksafe != SAFE_CRLF_RENORMALIZE) &&
> -		    has_cr_in_index(istate, path))
> +		    has_crlf_in_index(istate, path))
>  			convert_crlf_into_lf = 0;
>  	}
>  	if ((checksafe == SAFE_CRLF_WARN ||
> diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh
> index 68108d956a..0af35cfb1f 100755
> --- a/t/t0027-auto-crlf.sh
> +++ b/t/t0027-auto-crlf.sh
> @@ -43,19 +43,31 @@ create_gitattributes () {
>  	} >.gitattributes
>  }
>  
> -create_NNO_files () {
> +# Create 2 sets of files:
> +# The NNO files are "Not NOrmalized in the repo. We use CRLF_mix_LF and store
> +#   it under different names for the different test cases, see ${pfx}
> +#   Depending on .gitattributes they are normalized at the next commit (or not)
> +# The MIX files have different contents in the repo.
> +#   Depending on its contents, the "new safer autocrlf" may kick in.
> +create_NNO_MIX_files () {
>  	for crlf in false true input
>  	do
>  		for attr in "" auto text -text
>  		do
>  			for aeol in "" lf crlf
>  			do
> -				pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf}
> +				pfx=NNO_attr_${attr}_aeol_${aeol}_${crlf} &&
>  				cp CRLF_mix_LF ${pfx}_LF.txt &&
>  				cp CRLF_mix_LF ${pfx}_CRLF.txt &&
>  				cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
>  				cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
> -				cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
> +				cp CRLF_mix_LF ${pfx}_CRLF_nul.txt &&
> +				pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf} &&
> +				cp LF          ${pfx}_LF.txt &&
> +				cp CRLF        ${pfx}_CRLF.txt &&
> +				cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
> +				cp LF_mix_CR   ${pfx}_LF_mix_CR.txt &&
> +				cp CRLF_nul    ${pfx}_CRLF_nul.txt
>  			done
>  		done
>  	done
> @@ -136,6 +148,49 @@ commit_chk_wrnNNO () {
>  	'
>  }
>  
> +# Commit a file with mixed line endings on top of different files
> +# in the index. Check for warnings
> +commit_MIX_chkwrn () {
> +	attr=$1 ; shift
> +	aeol=$1 ; shift
> +	crlf=$1 ; shift
> +	lfwarn=$1 ; shift
> +	crlfwarn=$1 ; shift
> +	lfmixcrlf=$1 ; shift
> +	lfmixcr=$1 ; shift
> +	crlfnul=$1 ; shift
> +	pfx=MIX_attr_${attr}_aeol_${aeol}_${crlf}
> +	#Commit file with CLRF_mix_LF on top of existing file
> +	create_gitattributes "$attr" $aeol &&
> +	for f in LF CRLF CRLF_mix_LF LF_mix_CR CRLF_nul
> +	do
> +		fname=${pfx}_$f.txt &&
> +		cp CRLF_mix_LF $fname &&
> +		printf Z >>"$fname" &&
> +		git -c core.autocrlf=$crlf add $fname 2>"${pfx}_$f.err"
> +	done
> +
> +	test_expect_success "commit file with mixed EOL crlf=$crlf attr=$attr LF" '
> +		check_warning "$lfwarn" ${pfx}_LF.err
> +	'
> +	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF" '
> +		check_warning "$crlfwarn" ${pfx}_CRLF.err
> +	'
> +
> +	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_mix_LF" '
> +		check_warning "$lfmixcrlf" ${pfx}_CRLF_mix_LF.err
> +	'
> +
> +	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf LF_mix_cr" '
> +		check_warning "$lfmixcr" ${pfx}_LF_mix_CR.err
> +	'
> +
> +	test_expect_success "commit file with mixed EOL attr=$attr aeol=$aeol crlf=$crlf CRLF_nul" '
> +		check_warning "$crlfnul" ${pfx}_CRLF_nul.err
> +	'
> +}
> +
> +
>  stats_ascii () {
>  	case "$1" in
>  	LF)
> @@ -323,8 +378,8 @@ test_expect_success 'setup master' '
>  	printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONE\r\nLINETWO\rLINETHREE"   >CRLF_mix_CR &&
>  	printf "\$Id: 0000000000000000000000000000000000000000 \$\r\nLINEONEQ\r\nLINETWO\r\nLINETHREE" | q_to_nul >CRLF_nul &&
>  	printf "\$Id: 0000000000000000000000000000000000000000 \$\nLINEONEQ\nLINETWO\nLINETHREE" | q_to_nul >LF_nul &&
> -	create_NNO_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
> -	git -c core.autocrlf=false add NNO_*.txt &&
> +	create_NNO_MIX_files CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF CRLF_mix_LF &&
> +	git -c core.autocrlf=false add NNO_*.txt MIX_*.txt &&
>  	git commit -m "mixed line endings" &&
>  	test_tick
>  '
> @@ -385,6 +440,17 @@ test_expect_success 'commit files attr=crlf' '
>  	commit_check_warn input "crlf" "LF_CRLF" ""        "LF_CRLF" "LF_CRLF" ""
>  '
>  
> +# Commit "CRLFmixLF" on top of these files already in the repo:
> +# LF, CRLF, CRLFmixLF LF_mix_CR CRLFNULL
> +#                 attr                    LF        CRLF      CRLFmixLF   LF_mix_CR   CRLFNUL
> +commit_MIX_chkwrn ""      ""      false   ""        ""        ""          ""          ""
> +commit_MIX_chkwrn ""      ""      true    "LF_CRLF" ""        ""          "LF_CRLF"   "LF_CRLF"
> +commit_MIX_chkwrn ""      ""      input   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
> +
> +commit_MIX_chkwrn "auto"  ""      false   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
> +commit_MIX_chkwrn "auto"  ""      true    "LF_CRLF" ""        ""          "LF_CRLF"   "LF_CRLF"
> +commit_MIX_chkwrn "auto"  ""      input   "CRLF_LF" ""        ""          "CRLF_LF"   "CRLF_LF"
> +
>  #                 attr                    LF        CRLF      CRLFmixLF   LF_mix_CR   CRLFNUL
>  commit_chk_wrnNNO ""      ""      false   ""        ""        ""          ""          ""
>  commit_chk_wrnNNO ""      ""      true    LF_CRLF   ""        ""          ""          ""





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux