From: Torsten Bögershausen <tboegi@xxxxxx> The following sequence leads to a "BUG" assertion running under MacOS: !/bin/sh DIR=git-test-restore-p Adiarnfd=$(printf 'A\314\210') DIRNAME=xx${Adiarnfd}yy mkdir $DIR && cd $DIR && git init && mkdir $DIRNAME && cd $DIRNAME && echo "Initial" >file && git add file && echo "One more line" >>file && echo y | git restore -p . && echo "OK" Initialized empty Git repository in /tmp/git-test-restore-p/.git/ BUG: pathspec.c:495: error initializing pathspec_item Cannot close git diff-index --cached --numstat [snip] The command `git restore` is run from a directory inside a Git repo. The Git needs to split the $CWD into 2 parts: The path to the repo and "the rest", if any. "The rest" becomes a "prefix" later used inside the pathspec code. As an example, "/path/to/repo/dir-inside-repå" would determine "/path/to/repo" as the root of the repo, the place where the configuration file .git/config is found. The rest becomes the prefix ("dir-inside-repå"), from where the pathspec machinery expands the ".", more about this later. If there is a decomposed form, (making the decomposing visible like this), "dir-inside-rep°a" doesn't match "dir-inside-repå". The solution is to read the config variable "core.precomposeunicode" early. Then, if configured, precompose "prefix" (and argv) and handle the prefix over into pathspec for expanding "." into a list of path names tracked by Git. [1] git-bugreport-2021-01-06-1209.txt (git can't deal with special characters) [2] https://lore.kernel.org/git/A102844A-9501-4A86-854D-E3B387D378AA@xxxxxxxxxx/ Reported-by: Daniel Troger <random_n0body@xxxxxxxxxx> Helped-By: Philippe Blain <levraiphilippeblain@xxxxxxxxx> Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx> --- This may need some refinements, but we need to start somewhere... Are there any good ideas how to improve the commit message ? Should the code in git.c be "hidden" in a function somewhere else ? Other comments are appreciated. compat/precompose_utf8.c | 24 ++++++++++++++++++++++++ compat/precompose_utf8.h | 2 ++ git-compat-util.h | 8 ++++++++ git.c | 9 +++++++++ t/t3910-mac-os-precompose.sh | 15 +++++++++++++++ 5 files changed, 58 insertions(+) diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c index 136250fbf6..06e371660f 100644 --- a/compat/precompose_utf8.c +++ b/compat/precompose_utf8.c @@ -36,6 +36,11 @@ static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c) return ret; } +int precompose_read_config_gently(void) +{ + git_config_get_bool("core.precomposeunicode", &precomposed_unicode); + return precomposed_unicode == 1; +} void probe_utf8_pathname_composition(void) { @@ -60,6 +65,25 @@ void probe_utf8_pathname_composition(void) strbuf_release(&path); } +char *precompose_string_if_needed(const char *in) +{ + size_t inlen = strlen(in); + size_t outlen; + char *out = NULL; + if ((has_non_ascii(in, inlen, NULL)) && (precomposed_unicode == 1)) { + int saved_errno = errno; + out = reencode_string_len(in, inlen, + repo_encoding, path_encoding, + &outlen); + if (out && outlen == inlen && !memcmp(in, out, outlen)) { + /* strings are identical: no need to return a new one */ + free(out); + out = NULL; + } + errno = saved_errno; + } + return out; +} void precompose_argv(int argc, const char **argv) { diff --git a/compat/precompose_utf8.h b/compat/precompose_utf8.h index 6f843d3e1a..ce82857d73 100644 --- a/compat/precompose_utf8.h +++ b/compat/precompose_utf8.h @@ -28,6 +28,8 @@ typedef struct { struct dirent_prec_psx *dirent_nfc; } PREC_DIR; +int precompose_read_config_gently(void); +char *precompose_string_if_needed(const char *in); void precompose_argv(int argc, const char **argv); void probe_utf8_pathname_composition(void); diff --git a/git-compat-util.h b/git-compat-util.h index 104993b975..f34854b66f 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -252,6 +252,14 @@ typedef unsigned long uintptr_t; #ifdef PRECOMPOSE_UNICODE #include "compat/precompose_utf8.h" #else +static inline int precompose_read_config_gently(void) +{ + return 0; +} +static inline char *precompose_string_if_needed(const char *in) +{ + return NULL; /* no need to precompose a string */ +} static inline void precompose_argv(int argc, const char **argv) { ; /* nothing */ diff --git a/git.c b/git.c index a00a0a4d94..f09e14f733 100644 --- a/git.c +++ b/git.c @@ -421,6 +421,15 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) prefix = setup_git_directory_gently(&nongit_ok); } + if (precompose_read_config_gently()) { + precompose_argv(argc, argv); + if (prefix) { + const char *prec_pfx; + prec_pfx = precompose_string_if_needed(prefix); + if (prec_pfx) + prefix = prec_pfx; /* memory lost */ + } + } if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY) && !(p->option & DELAY_PAGER_CONFIG)) use_pager = check_pager_config(p->cmd); diff --git a/t/t3910-mac-os-precompose.sh b/t/t3910-mac-os-precompose.sh index 54ce19e353..bbbc50da93 100755 --- a/t/t3910-mac-os-precompose.sh +++ b/t/t3910-mac-os-precompose.sh @@ -191,6 +191,21 @@ test_expect_failure 'handle existing decomposed filenames' ' test_must_be_empty untracked ' +test_expect_success "unicode decomposed: git restore -p . " ' + DIRNAMEPWD=dir.Odiarnfc && + DIRNAMEINREPO=dir.$Adiarnfc && + export DIRNAMEPWD DIRNAMEINREPO && + git init $DIRNAMEPWD && + ( cd $DIRNAMEPWD && + mkdir $DIRNAMEINREPO && + cd $DIRNAMEINREPO && + echo "Initial" >file && + git add file && + echo "More stuff" >>file && + echo y | git restore -p . + ) +' + # Test if the global core.precomposeunicode stops autosensing # Must be the last test case test_expect_success "respect git config --global core.precomposeunicode" ' -- 2.30.0.155.g66e871b664