Torsten Bögershausen skrev 2012-09-01 08.11:> Allow path names to be encoded in UTF-8 in the repository > and checkout out as e.g. ISO-8859-1 in the working tree. Ack for attempting this. Did it myself if 2007, but times weren't ripe then, I guess. > +i18n.pathEncoding:: > + This option is only used by some implementations of git. > + When "git init" sets core.supportspathencoding to true, > + i18n.pathEncoding can be set to re-encode path names when > + a working tree is checked out. > + Path names may be e.g. encoded in ISO-8859-1 and are stored as > + UTF-8 encoded in the repository. > + When not set, the encoding of path names is the same in working tree > + and the repository. "If set, then core.precomposeunicode is ignored on Mac OS X." > diff --git a/compat/reencode_pathname.c b/compat/reencode_pathname.c > new file mode 100644 > index 0000000..3bdc776 > --- /dev/null > +++ b/compat/reencode_pathname.c > @@ -0,0 +1,441 @@ > +/* > + * Converts pathnames from one encoding into another. > + * The pathnames are stored as UTF-8 in the repository, > + * and might be checkout out as e.g. ISO-8859-1 in the working tree > + * > + * On MacOS X decomposed unicode is converted into precomposed unicode. , ignoring the setting of core.precomposeunicode. [...] > + */ > + > +#define REENCODE_PATHNAME_C > +#include "cache.h" > +#include "utf8.h" > +#include "reencode_pathname.h" > + > +#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6)) > + typedef const char *iconv_ibp; > +#else > + typedef char *iconv_ibp; > +#endif > + > +const static char *repo_path_encoding = "UTF-8"; > + > +static iconv_t iconv_open_or_die(const char *tocode, const char *fromcode) > +{ > + iconv_t my_iconv; > + my_iconv = iconv_open(tocode, fromcode); join these two lines > + if (my_iconv == (iconv_t) -1) > + die_errno(_("iconv_open(%s,%s) failed"), tocode, fromcode); > + return my_iconv; > +} > + > +static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c) > +{ > + const uint8_t *ptr = (const uint8_t *)s; > + size_t strlen_chars = 0; > + size_t ret = 0; > + > + if (!ptr || !*ptr) > + return 0; > + > + while (*ptr && maxlen) { > + if (*ptr & 0x80) > + ret++; > + strlen_chars++; > + ptr++; > + maxlen--; > + } > + if (strlen_c) > + *strlen_c = strlen_chars; > + > + return ret; > +} > + > +#ifdef PRECOMPOSE_UNICODE > +void probe_utf8_pathname_composition(char *path, int len) > +{ > + static const char *auml_nfc = "\xc3\xa4"; > + static const char *auml_nfd = "\x61\xcc\x88"; > + int output_fd; > + if (precomposed_unicode != -1) > + return; /* We found it defined in the global config, respect it */ a bland line here would be nice > + strcpy(path + len, auml_nfc); > + output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); > + if (output_fd >= 0) { > + close(output_fd); > + strcpy(path + len, auml_nfd); > + /* Indicate to the user, that we can configure it to true */ > + if (!access(path, R_OK)) > + git_config_set("core.precomposeunicode", "false"); > + /* To be backward compatible, set precomposed_unicode to 0 */ > + precomposed_unicode = 0; > + strcpy(path + len, auml_nfc); > + if (unlink(path)) > + die_errno(_("failed to unlink '%s'"), path); > + } > +} > +#endif [...] > +struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *renc_pn_dir) > +{ > + struct dirent *res; > + res = readdir(renc_pn_dir->dirp); > + if (res) { > + size_t namelenz = strlen(res->d_name) + 1; /* \0 */ > + size_t new_len_needed = 0; > + int ret_errno = errno; > + > + renc_pn_dir->dirent_utf8->d_ino = res->d_ino; > + renc_pn_dir->dirent_utf8->d_type = res->d_type; > + do { > + if (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len) { indent [...] > diff --git a/environment.c b/environment.c > index 85edd7f..ba81575 100644 > --- a/environment.c > +++ b/environment.c > @@ -59,6 +59,7 @@ int grafts_replace_parents = 1; > int core_apply_sparse_checkout; > int merge_log_config = -1; > int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ > +const char *wt_path_encoding = NULL; indent > struct startup_info *startup_info; > unsigned long pack_size_limit_cfg; > > diff --git a/git-compat-util.h b/git-compat-util.h > index 35b095e..877b060 100644 > --- a/git-compat-util.h > +++ b/git-compat-util.h > @@ -153,13 +153,21 @@ > #endif > #endif > > -/* used on Mac OS X */ > -#ifdef PRECOMPOSE_UNICODE > -#include "compat/precompose_utf8.h" > +#if defined(PATH_ENCODING) || defined(PRECOMPOSE_UNICODE) > +#include "compat/reencode_pathname.h" > #else > -#define precompose_str(in,i_nfd2nfc) > -#define precompose_argv(c,v) > -#define probe_utf8_pathname_composition(a,b) > +#define reencode_argv(c,v) > +#endif > + > +/* needed for Mac OS X */ > +#ifndef PRECOMPOSE_UNICODE > +#define probe_utf8_pathname_composition(a,b); > +#endif > + > +#ifndef PATH_ENCODING > +#define str_worktree2repolen(in, insz) (NULL) > +#define str_repo2worktree(in) (NULL) > +#define str_worktree2repo(in) (NULL) > #endif > > #ifndef NO_LIBGEN_H > diff --git a/parse-options.c b/parse-options.c > index c1c66bd..5840c18 100644 > --- a/parse-options.c > +++ b/parse-options.c > @@ -476,7 +476,7 @@ int parse_options(int argc, const char **argv, const char *prefix, > usage_with_options(usagestr, options); > } > > - precompose_argv(argc, argv); > + reencode_argv(argc, argv); > return parse_options_end(&ctx); > } > > diff --git a/t/t3911-i18n-filename-8859.sh b/t/t3911-i18n-filename-8859.sh > new file mode 100755 > index 0000000..aa2be57 > --- /dev/null > +++ b/t/t3911-i18n-filename-8859.sh > @@ -0,0 +1,251 @@ > +#!/bin/sh > +# > +# Copyright (c) 2010 Torsten Bögershausen > +# > + > +test_description='file system encodings UTF-8 ISO8859-1' > + > +. ./test-lib.sh > + > +fname_UTF_8=`printf '\303\206\302\242'` > +fname_ISO8859_1=`printf '\306\242'` > +Euro_utf8=`printf '\342\202\254'` > +supportspathencoding=`git config core.supportspathencoding` || : > + > + > +add_file_dir_link() { > + local bname=$1 > + local fname=$2 > + test_expect_success "add file $fname.f $bname" ' > + git checkout master && > + git checkout -b add_f_$bname && > + >$fname.f && > + git add $fname.f && > + git commit -m "add fname" > + ' > + > + test_expect_success "add dir $fname.d $bname" ' > + git checkout master && > + git checkout -b add_d_$bname && > + mkdir $fname.d && > + touch $fname.d/$fname.f && > + git add $fname.d/$fname.f && > + git commit -m "add fname.d/fname" > + ' > + > + i=0 > + for src in x $fname; do > + for dst in x $fname; do > + test_expect_success "add link $dst.l->$src.f on branch add_l_${i}_$bname" ' > + git checkout master && > + git checkout -b add_l_${i}_$bname && > + ln -s $src.f $dst.l && > + git add $dst.l && > + git commit -m "add fname.l $i" > + ' > + i=$(($i+1)) > + done > + done > +} > + > +test_expect_success "setup add rm x" ' > + >x && > + git add x && > + git commit -m "1st commit" && > + git rm x && > + git commit -m "rm x" > +' > + > +#combinations to be tested: > +# UTF-8 -> ISO8859-1 > +# ISO8859-1 -> UTF-8 > + > +if test "$supportspathencoding" > +then > + srcencodings="ISO8859-1 UTF-8" > + for srcenc in $srcencodings > + do > + case $srcenc in > + ISO8859-1) > + dstenc=UTF-8 > + ;; > + UTF-8) > + dstenc=ISO8859-1 > + ;; > + UTF-8-MAC) > + dstenc=UTF-8 > + ;; > + *) > + echo >&2 "Wrong encoding $srcenc" > + exit 1 > + ;; > + esac > + eval fname_src=\$fname_$(echo $srcenc | sed -e 's/-/_/g' -e 's/_MAC//') > + eval fname_dst=\$fname_$(echo $dstenc | sed -e 's/-/_/g') > + test_expect_success "setup $srcenc" ' > + git checkout master && > + git config i18n.pathencoding $srcenc > + ' > + add_file_dir_link $srcenc $fname_src > + > + test_expect_success "setup $dstenc" ' > + git checkout master && > + echo "git checkout Master" >&2 > + ls -l >&2 > + git config i18n.pathencoding $dstenc > + ' > + > + test_expect_success "checkout file $dstenc (was $srcenc)" ' > + git checkout add_f_$srcenc > + ' > + > + test_expect_success "exists file $dstenc (was $srcenc)" ' > + test -f $fname_dst.f > + ' > + > + test_expect_success "log file $dstenc (was $srcenc)" ' > + git log $fname_dst.f > + ' > + > + test_expect_success "git mv" ' > + git checkout -b mv_file_$srcenc && > + git mv $fname_dst.f XX.f && > + git commit -m "git mv fname_dst.f XX.f" > + ' > + > + test_expect_success "checkout dir $dstenc (was $srcenc)" ' > + git checkout add_d_$srcenc > + ' > + > + test_expect_success "exist dir $dstenc (was $srcenc)" ' > + test -d $fname_dst.d > + ' > + > + test_expect_success "log dir $dstenc (was $srcenc)" ' > + git log $fname_dst.d > + ' > + > + i=0 > + for src in x $fname_dst; do > + for dst in x $fname_dst; do > + test_expect_success "checkout link $dst.l->$src.f branch add_l_${i}_$srcenc" ' > + git checkout add_l_${i}_$srcenc > + ' > + test_expect_success "exist link $dst.l->$src.f branch add_l_${i}_$srcenc" ' > + test -L $dst.l > + ' > + test_expect_success "log link $dst.l->$src.f branch add_l_${i}_$srcenc" ' > + git log $dst.l > + ' > + test_expect_success "readlink $dst.l->$src.f branch add_l_${i}_$srcenc" ' > + echo "$src.f" >expect && > + readlink "$dst.l" > actual && > + test_cmp expect actual && > + rm expect actual > + ' > + i=$(($i+1)) > + done > + done > + done > + # Make sure that Euro sign can NOT be checked out in 8859 "8859-1", The euro sign exists in 8859-15. > + #fname_src=Euro > + test_expect_success "setup UTF-8" ' > + git checkout master && > + git config i18n.pathencoding UTF-8 > + ' > + add_file_dir_link Euro $Euro_utf8 > + > + test_expect_success "setup ISO8859-1" ' > + git checkout master && > + rm -rf * && > + git config i18n.pathencoding ISO8859-1 > + ' > + test_expect_success "checkout file Euro branch add_f_Euro" ' > + git checkout add_f_Euro Missing && ? > + echo * >actual && > + echo "*" >expect && > + test_cmp expect actual && > + rm expect actual > + ' > + > + test_expect_success "checkout dir Euro branch add_d_Euro" ' > + rm -rf * && > + test_must_fail git checkout add_d_Euro > + ' > + > + test_expect_success "Cleanup" ' > + git config i18n.pathencoding UTF-8 && > + git checkout master && > + rm -rf * && > + git reset --hard && > + git config i18n.pathencoding ISO8859-1 > + ' > + > + test_expect_success "checkout link Euro.l->x.f branch add_l_1_Euro" ' > + ! git checkout add_l_1_Euro > + ' > + > + test_expect_success "No link Euro.l->x.f" ' > + echo * >actual && > + echo "*" >expect && > + test_cmp expect actual && > + rm expect actual > + ' > + > + test_expect_success "Cleanup after Euro.l->x.f" ' > + git config i18n.pathencoding UTF-8 && > + git checkout master && > + rm -rf * && > + git reset --hard && > + git config i18n.pathencoding ISO8859-1 > + ' > + > + # Checkoing out a soft link pointing to a filename outside "checking" > + # 8859-1 should fail > + test_expect_failure "checkout link x.l->Euro.f branch add_l_2_Euro" ' > + ! git checkout add_l_2_Euro > + ' > + > + test_expect_success "No link x.f->Euro.l" ' > + echo * >actual && > + echo "*" >expect && > + test_cmp expect actual && > + rm expect actual > + ' > + > + test_expect_success "Cleanup after link x.l->Euro.f branch" ' > + git config i18n.pathencoding UTF-8 && > + git checkout master && > + rm -rf * && > + git reset --hard && > + git config i18n.pathencoding ISO8859-1 > + ' > + > + test_expect_success "checkout link Euro.l->Euro.f branch add_l_3_Euro" ' > + ! git checkout add_l_3_Euro > + ' > + > + test_expect_success "No link Euro.l->Euro.f" ' > + echo * >actual && > + echo "*" >expect && > + test_cmp expect actual && > + rm expect actual > + ' > + > +else > + test_expect_success "setup 8859" ' "8859-1" > + git config i18n.pathencoding ISO8859-1 && > + git checkout -b add_file_8859 && > + > $fname_src.f && > + git add $fname_src.f && > + git commit -m "add fname_src" && > + git config i18n.pathencoding UTF-8 && > + rm -rf * && > + git reset --hard > + ' > + test_expect_success "Silent support of pathencoding" ' > + test_must_fail test -f $fname_UTF_8.f > + ' > +fi > + > +test_done -- robin -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html