[PATCH/RFC v1 1/1] git restore -p . and precomposed unicode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Torsten Bögershausen <tboegi@xxxxxx>

The following sequence leads to a "BUG" assertion running under MacOS:

  !/bin/sh
  DIR=git-test-restore-p
  Adiarnfd=$(printf 'A\314\210')
  DIRNAME=xx${Adiarnfd}yy
  mkdir $DIR &&
  cd $DIR &&
  git init &&
  mkdir $DIRNAME &&
  cd $DIRNAME &&
  echo "Initial" >file &&
  git add file &&
  echo "One more line"  >>file &&
  echo y | git restore -p . &&
  echo "OK"

 Initialized empty Git repository in /tmp/git-test-restore-p/.git/
 BUG: pathspec.c:495: error initializing pathspec_item
 Cannot close git diff-index --cached --numstat
 [snip]


The command `git restore` is run from a directory inside a Git repo.
The Git needs to split the $CWD into 2 parts:
The path to the repo and "the rest", if any.
"The rest" becomes a "prefix" later used inside the pathspec code.

As an example, "/path/to/repo/dir-inside-repå" would determine
"/path/to/repo" as the root of the repo, the place where the
configuration file .git/config is found.

The rest becomes the prefix ("dir-inside-repå"), from where the pathspec
machinery expands the ".", more about this later.
If there is a decomposed form, (making the decomposing visible like this),
"dir-inside-rep°a" doesn't match "dir-inside-repå".

The solution is to read the config variable "core.precomposeunicode" early.
Then, if configured, precompose "prefix" (and argv) and handle the prefix
over into pathspec for expanding "." into a list of path names tracked by Git.

[1] git-bugreport-2021-01-06-1209.txt (git can't deal with special characters)
[2] https://lore.kernel.org/git/A102844A-9501-4A86-854D-E3B387D378AA@xxxxxxxxxx/

Reported-by: Daniel Troger <random_n0body@xxxxxxxxxx>
Helped-By: Philippe Blain <levraiphilippeblain@xxxxxxxxx>
Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>
---
 This may need some refinements, but we need to start somewhere...
 Are there any good ideas how to improve the commit message ?
 Should the code in git.c be "hidden" in a function somewhere else ?
 Other comments are appreciated.


compat/precompose_utf8.c     | 24 ++++++++++++++++++++++++
 compat/precompose_utf8.h     |  2 ++
 git-compat-util.h            |  8 ++++++++
 git.c                        |  9 +++++++++
 t/t3910-mac-os-precompose.sh | 15 +++++++++++++++
 5 files changed, 58 insertions(+)

diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c
index 136250fbf6..06e371660f 100644
--- a/compat/precompose_utf8.c
+++ b/compat/precompose_utf8.c
@@ -36,6 +36,11 @@ static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
 	return ret;
 }

+int precompose_read_config_gently(void)
+{
+	git_config_get_bool("core.precomposeunicode", &precomposed_unicode);
+	return precomposed_unicode == 1;
+}

 void probe_utf8_pathname_composition(void)
 {
@@ -60,6 +65,25 @@ void probe_utf8_pathname_composition(void)
 	strbuf_release(&path);
 }

+char *precompose_string_if_needed(const char *in)
+{
+	size_t inlen = strlen(in);
+	size_t outlen;
+	char *out = NULL;
+	if ((has_non_ascii(in, inlen, NULL)) && (precomposed_unicode == 1)) {
+		int saved_errno = errno;
+		out = reencode_string_len(in, inlen,
+					  repo_encoding, path_encoding,
+					  &outlen);
+		if (out && outlen == inlen && !memcmp(in, out, outlen)) {
+			/* strings are identical: no need to return a new one */
+			free(out);
+			out = NULL;
+		}
+		errno = saved_errno;
+	}
+	return out;
+}

 void precompose_argv(int argc, const char **argv)
 {
diff --git a/compat/precompose_utf8.h b/compat/precompose_utf8.h
index 6f843d3e1a..ce82857d73 100644
--- a/compat/precompose_utf8.h
+++ b/compat/precompose_utf8.h
@@ -28,6 +28,8 @@ typedef struct {
 	struct dirent_prec_psx *dirent_nfc;
 } PREC_DIR;

+int precompose_read_config_gently(void);
+char *precompose_string_if_needed(const char *in);
 void precompose_argv(int argc, const char **argv);
 void probe_utf8_pathname_composition(void);

diff --git a/git-compat-util.h b/git-compat-util.h
index 104993b975..f34854b66f 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -252,6 +252,14 @@ typedef unsigned long uintptr_t;
 #ifdef PRECOMPOSE_UNICODE
 #include "compat/precompose_utf8.h"
 #else
+static inline int precompose_read_config_gently(void)
+{
+	return 0;
+}
+static inline char *precompose_string_if_needed(const char *in)
+{
+	return NULL; /* no need to precompose a string */
+}
 static inline void precompose_argv(int argc, const char **argv)
 {
 	; /* nothing */
diff --git a/git.c b/git.c
index a00a0a4d94..f09e14f733 100644
--- a/git.c
+++ b/git.c
@@ -421,6 +421,15 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 			prefix = setup_git_directory_gently(&nongit_ok);
 		}

+		if (precompose_read_config_gently()) {
+			precompose_argv(argc, argv);
+			if (prefix) {
+				const char *prec_pfx;
+					prec_pfx = precompose_string_if_needed(prefix);
+				if (prec_pfx)
+					prefix = prec_pfx; /* memory lost */
+			}
+		}
 		if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY) &&
 		    !(p->option & DELAY_PAGER_CONFIG))
 			use_pager = check_pager_config(p->cmd);
diff --git a/t/t3910-mac-os-precompose.sh b/t/t3910-mac-os-precompose.sh
index 54ce19e353..bbbc50da93 100755
--- a/t/t3910-mac-os-precompose.sh
+++ b/t/t3910-mac-os-precompose.sh
@@ -191,6 +191,21 @@ test_expect_failure 'handle existing decomposed filenames' '
 	test_must_be_empty untracked
 '

+test_expect_success "unicode decomposed: git restore -p . " '
+	DIRNAMEPWD=dir.Odiarnfc &&
+	DIRNAMEINREPO=dir.$Adiarnfc &&
+	export DIRNAMEPWD DIRNAMEINREPO &&
+	git init $DIRNAMEPWD &&
+	( cd $DIRNAMEPWD &&
+		mkdir $DIRNAMEINREPO &&
+		cd $DIRNAMEINREPO &&
+		echo "Initial" >file &&
+		git add file &&
+		echo "More stuff" >>file &&
+		echo y | git restore -p .
+	)
+'
+
 # Test if the global core.precomposeunicode stops autosensing
 # Must be the last test case
 test_expect_success "respect git config --global core.precomposeunicode" '
--
2.30.0.155.g66e871b664





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux