Allow path names to be encoded in UTF-8 in the repository and checkout out as e.g. ISO-8859-1 in the working tree. Introduce a config variable i18n.pathEncoding. If empty, no re-encoding of path names is done. Add t3911 to test encoding back and forth The re-encoding is done in compat/reencode_pathname.c, where all file system functions like open(), stat(), readdir() are re-defined. reencode_pathname.c includes all functionality from precompose_utf8.c, which should be removed Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx> --- Please read this as an RFC, so there several limitations: compat/reencode_pathname.h defines struct dirent_psx with d_name[2]. This is done to test renc_pn_readdir() in compat/reencode_pathname.c test case t1450 failes even on one of my linux machines. At first glance it looks as the same failure which has been sometimes observed on Mac OS X. compat/precompose_utf8.[ch] had been integrated into reencode_pathname.[ch], and should be removed. The patch should work on v7.1.12, it's not tested against latest master Comments are welcome. Documentation/config.txt | 10 + Makefile | 11 +- builtin/init-db.c | 3 + cache.h | 1 + compat/reencode_pathname.c | 441 ++++++++++++++++++++++++++++++++++++++++++ compat/reencode_pathname.h | 72 +++++++ config.c | 3 + environment.c | 1 + git-compat-util.h | 20 +- parse-options.c | 2 +- t/t3911-i18n-filename-8859.sh | 251 ++++++++++++++++++++++++ wt-status.c | 21 +- 12 files changed, 827 insertions(+), 9 deletions(-) create mode 100644 compat/reencode_pathname.c create mode 100644 compat/reencode_pathname.h create mode 100755 t/t3911-i18n-filename-8859.sh diff --git a/Documentation/config.txt b/Documentation/config.txt index a95e5a4..d633d54 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1442,6 +1442,16 @@ i18n.logOutputEncoding:: Character encoding the commit messages are converted to when running 'git log' and friends. +i18n.pathEncoding:: + This option is only used by some implementations of git. + When "git init" sets core.supportspathencoding to true, + i18n.pathEncoding can be set to re-encode path names when + a working tree is checked out. + Path names may be e.g. encoded in ISO-8859-1 and are stored as + UTF-8 encoded in the repository. + When not set, the encoding of path names is the same in working tree + and the repository. + imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. diff --git a/Makefile b/Makefile index 6b0c961..141562e 100644 --- a/Makefile +++ b/Makefile @@ -143,6 +143,9 @@ all:: # # Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). # +# Define PATH_ENCODING if the encoding of file names +# differs from the encoding in the git repo +# # Define NEEDS_SOCKET if linking with libc is not enough (SunOS, # Patrick Mauritz). # @@ -595,6 +598,7 @@ LIB_H += compat/bswap.h LIB_H += compat/cygwin.h LIB_H += compat/mingw.h LIB_H += compat/obstack.h +LIB_H += compat/reencode_pathname.h LIB_H += compat/precompose_utf8.h LIB_H += compat/terminal.h LIB_H += compat/win32/dirent.h @@ -932,6 +936,7 @@ ifeq ($(uname_S),OSF1) NO_NSEC = YesPlease endif ifeq ($(uname_S),Linux) + PATH_ENCODING = YesPlease NO_STRLCPY = YesPlease NO_MKSTEMPS = YesPlease HAVE_PATHS_H = YesPlease @@ -999,7 +1004,7 @@ ifeq ($(uname_S),Darwin) NO_MEMMEM = YesPlease USE_ST_TIMESPEC = YesPlease HAVE_DEV_TTY = YesPlease - COMPAT_OBJS += compat/precompose_utf8.o + COMPAT_OBJS += compat/reencode_pathname.o BASIC_CFLAGS += -DPRECOMPOSE_UNICODE endif ifeq ($(uname_S),SunOS) @@ -1591,6 +1596,10 @@ ifdef FREAD_READS_DIRECTORIES COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES COMPAT_OBJS += compat/fopen.o endif +ifdef PATH_ENCODING + COMPAT_CFLAGS += -DPATH_ENCODING + COMPAT_OBJS += compat/reencode_pathname.o +endif ifdef NO_SYMLINK_HEAD BASIC_CFLAGS += -DNO_SYMLINK_HEAD endif diff --git a/builtin/init-db.c b/builtin/init-db.c index 244fb7f..f159d43 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -291,6 +291,9 @@ static int create_default_files(const char *template_path) if (!access(path, F_OK)) git_config_set("core.ignorecase", "true"); probe_utf8_pathname_composition(path, len); +#ifdef PATH_ENCODING + git_config_set("core.supportspathencoding", "true"); +#endif } return reinit; diff --git a/cache.h b/cache.h index 67f28b4..8023767 100644 --- a/cache.h +++ b/cache.h @@ -1160,6 +1160,7 @@ extern int user_ident_sufficiently_given(void); extern const char *git_commit_encoding; extern const char *git_log_output_encoding; extern const char *git_mailmap_file; +extern const char *wt_path_encoding; /* IO helper functions */ extern void maybe_flush_or_die(FILE *, const char *); diff --git a/compat/reencode_pathname.c b/compat/reencode_pathname.c new file mode 100644 index 0000000..3bdc776 --- /dev/null +++ b/compat/reencode_pathname.c @@ -0,0 +1,441 @@ +/* + * Converts pathnames from one encoding into another. + * The pathnames are stored as UTF-8 in the repository, + * and might be checkout out as e.g. ISO-8859-1 in the working tree + * + * On MacOS X decomposed unicode is converted into precomposed unicode. + */ + +#define REENCODE_PATHNAME_C +#include "cache.h" +#include "utf8.h" +#include "reencode_pathname.h" + +#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6)) + typedef const char *iconv_ibp; +#else + typedef char *iconv_ibp; +#endif + +const static char *repo_path_encoding = "UTF-8"; + +static iconv_t iconv_open_or_die(const char *tocode, const char *fromcode) +{ + iconv_t my_iconv; + my_iconv = iconv_open(tocode, fromcode); + if (my_iconv == (iconv_t) -1) + die_errno(_("iconv_open(%s,%s) failed"), tocode, fromcode); + return my_iconv; +} + +static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c) +{ + const uint8_t *ptr = (const uint8_t *)s; + size_t strlen_chars = 0; + size_t ret = 0; + + if (!ptr || !*ptr) + return 0; + + while (*ptr && maxlen) { + if (*ptr & 0x80) + ret++; + strlen_chars++; + ptr++; + maxlen--; + } + if (strlen_c) + *strlen_c = strlen_chars; + + return ret; +} + +#ifdef PRECOMPOSE_UNICODE +void probe_utf8_pathname_composition(char *path, int len) +{ + static const char *auml_nfc = "\xc3\xa4"; + static const char *auml_nfd = "\x61\xcc\x88"; + int output_fd; + if (precomposed_unicode != -1) + return; /* We found it defined in the global config, respect it */ + strcpy(path + len, auml_nfc); + output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); + if (output_fd >= 0) { + close(output_fd); + strcpy(path + len, auml_nfd); + /* Indicate to the user, that we can configure it to true */ + if (!access(path, R_OK)) + git_config_set("core.precomposeunicode", "false"); + /* To be backward compatible, set precomposed_unicode to 0 */ + precomposed_unicode = 0; + strcpy(path + len, auml_nfc); + if (unlink(path)) + die_errno(_("failed to unlink '%s'"), path); + } +} +#endif + +void reencode_argv(int argc, const char **argv) +{ + int i = 0; + const char *oldarg; + char *newarg; + iconv_t ic_wt_to_repo; + +#ifdef PRECOMPOSE_UNICODE + if (precomposed_unicode == 1) + wt_path_encoding = "UTF-8-MAC"; +#endif + + if (!wt_path_encoding || !*wt_path_encoding) + return; + + ic_wt_to_repo = iconv_open_or_die(repo_path_encoding, wt_path_encoding); + + while (i < argc) { + size_t namelen; + oldarg = argv[i]; + if (has_non_ascii(oldarg, (size_t)-1, &namelen)) { + newarg = reencode_string_iconv(oldarg, namelen, ic_wt_to_repo); + if (newarg) + argv[i] = newarg; + } + i++; + } + iconv_close(ic_wt_to_repo); +} + +#ifdef PATH_ENCODING +char *str_repo2worktree(const char *in) +{ + int olderrno = errno; + char *retvalue = NULL; + size_t inlen; + + if (!wt_path_encoding || !*wt_path_encoding) + return NULL; + + if (!in) + die("str_repo2worktree in == NULL\n"); + + if (has_non_ascii(in, (size_t)-1, &inlen)) { + iconv_t my_iconv_repo2worktree; + my_iconv_repo2worktree = iconv_open_or_die(wt_path_encoding, + repo_path_encoding); + + retvalue = reencode_string_iconv(in, inlen, my_iconv_repo2worktree); + iconv_close(my_iconv_repo2worktree); + if (retvalue) + errno = olderrno; + } else + errno = olderrno; + + return retvalue; +} + +char *str_worktree2repolen(const char *in, size_t insz) +{ + char *retvalue = NULL; + size_t inlen; + if (!wt_path_encoding || !*wt_path_encoding) + return NULL; + + if (has_non_ascii(in, insz, &inlen)) { + int olderrno = errno; + iconv_t my_iconv_worktree2repo; + my_iconv_worktree2repo = iconv_open_or_die(repo_path_encoding, + wt_path_encoding); + retvalue = reencode_string_iconv(in, insz, my_iconv_worktree2repo); + iconv_close(my_iconv_worktree2repo); + if (retvalue) + errno = olderrno; + } + return retvalue; +} + +char *str_worktree2repo(const char *in) +{ + return str_worktree2repolen(in, strlen(in)); +} +#endif + +#define RENC_PN_DECL_SAVERRNO_PATH1(path) \ + int olderrno = errno; \ + const char *path1_enc = path; \ + char *path1_malloc_wt_encoded = NULL + +#define RENC_PN_DECL_PATH2(path) \ + const char *path2_enc = path; \ + char *path2_malloc_wt_encoded = NULL + + +#define RENC_PN_CONV_PATH1(path, erroret) \ + errno=0; \ + path1_malloc_wt_encoded = str_repo2worktree(path); \ + if (!path1_malloc_wt_encoded && errno) { \ + return erroret; \ + } \ + if (path1_malloc_wt_encoded) \ + path1_enc = path1_malloc_wt_encoded; \ + errno = olderrno; + +#define RENC_PN_CONV_PATH2(path) \ + errno=0; \ + path2_malloc_wt_encoded = str_repo2worktree(path); \ + if (!path2_malloc_wt_encoded && errno) { \ + return -1; \ + } \ + if (path2_malloc_wt_encoded) \ + path2_enc = path2_malloc_wt_encoded; \ + errno = olderrno; + + +RENC_FN_DIR *renc_pn_opendir(const char *dirname) +{ + RENC_PN_DECL_SAVERRNO_PATH1(dirname); + RENC_FN_DIR *renc_pn_dir = xmalloc(sizeof(RENC_FN_DIR)); + +#ifdef PRECOMPOSE_UNICODE + if (precomposed_unicode == 1) + wt_path_encoding = "UTF-8-MAC"; +#endif + + renc_pn_dir->dirent_utf8 = xmalloc(sizeof(dirent_psx)); + renc_pn_dir->dirent_utf8->max_name_len = sizeof(renc_pn_dir->dirent_utf8->d_name); + + RENC_PN_CONV_PATH1(dirname, NULL); + + renc_pn_dir->dirp = opendir(path1_enc); + olderrno = errno; + if (!renc_pn_dir->dirp) { + free(path1_malloc_wt_encoded); + free(renc_pn_dir->dirent_utf8); + free(renc_pn_dir); + return NULL; + } else + renc_pn_dir->ic_wt_to_repo = (iconv_t)-1; + + free(path1_malloc_wt_encoded); + errno = olderrno; + return renc_pn_dir; +} + +struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *renc_pn_dir) +{ + struct dirent *res; + res = readdir(renc_pn_dir->dirp); + if (res) { + size_t namelenz = strlen(res->d_name) + 1; /* \0 */ + size_t new_len_needed = 0; + int ret_errno = errno; + + renc_pn_dir->dirent_utf8->d_ino = res->d_ino; + renc_pn_dir->dirent_utf8->d_type = res->d_type; + do { + if (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len) { + size_t new_len = sizeof(dirent_psx) + new_len_needed - + sizeof(renc_pn_dir->dirent_utf8->d_name); + + renc_pn_dir->dirent_utf8 = xrealloc(renc_pn_dir->dirent_utf8, new_len); + renc_pn_dir->dirent_utf8->max_name_len = new_len_needed; + } + + if (wt_path_encoding && has_non_ascii(res->d_name, (size_t)-1, NULL)) { + iconv_ibp cp = (iconv_ibp)res->d_name; + size_t inleft = namelenz; + char *outpos = &renc_pn_dir->dirent_utf8->d_name[0]; + size_t outsz = renc_pn_dir->dirent_utf8->max_name_len; + errno = 0; + if (renc_pn_dir->ic_wt_to_repo == (iconv_t)-1) + renc_pn_dir->ic_wt_to_repo = iconv_open_or_die(repo_path_encoding, + wt_path_encoding); + if (-1 != iconv(renc_pn_dir->ic_wt_to_repo, + &cp, &inleft, &outpos, &outsz)) + break; /* Conversion OK, we are done */ + if (errno == E2BIG) { + char *tmp = reencode_string_iconv(res->d_name, namelenz, + renc_pn_dir->ic_wt_to_repo); + if (tmp) { + new_len_needed = strlen(tmp) + 1; /* \0 */ + free(tmp); + } + } else { + /* + * iconv() failed and errno could be EILSEQ, EINVAL, EBADF + * In general we avoid illegal byte sequences. + * If they occur on a mounted drive (e.g. NFS) it is not worth to + * die() for that, but rather let the user see the original name + */ + namelenz = 0; /* trigger strlcpy */ + } + } else { + if (namelenz > renc_pn_dir->dirent_utf8->max_name_len) + new_len_needed = namelenz; /* need to re-allocate */ + else + namelenz = 0; /* trigger strlcpy */ + } + } while (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len); + + if (!namelenz) + strlcpy(renc_pn_dir->dirent_utf8->d_name, res->d_name, + renc_pn_dir->dirent_utf8->max_name_len); + + errno = ret_errno; + return renc_pn_dir->dirent_utf8; + } + return NULL; +} + +int renc_pn_closedir(RENC_FN_DIR *renc_pn_dir) +{ + int ret_value; + int ret_errno; + ret_value = closedir(renc_pn_dir->dirp); + ret_errno = errno; + if (renc_pn_dir->ic_wt_to_repo != (iconv_t)-1) + iconv_close(renc_pn_dir->ic_wt_to_repo); + free(renc_pn_dir->dirent_utf8); + free(renc_pn_dir); + errno = ret_errno; + return ret_value; +} + +int renc_pn_mkdir(const char *path, mode_t mode) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + int ret; + + RENC_PN_CONV_PATH1(path, -1); + + ret = mkdir(path1_enc, mode); + free(path1_malloc_wt_encoded); + return ret; +} + +int renc_pn_lstat(const char *path, struct stat *buf) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + int ret; + + RENC_PN_CONV_PATH1(path, -1); + + ret = lstat(path1_enc, buf); + + free(path1_malloc_wt_encoded); + return ret; +} + +int renc_pn_stat(const char *path, struct stat *buf) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + int ret; + + RENC_PN_CONV_PATH1(path, -1); + + ret = stat(path1_enc, buf); + + free(path1_malloc_wt_encoded); + return ret; +} + +int renc_pn_open(const char *path, int oflag, ... ) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + va_list params; + int mode; + int ret; + + va_start(params, oflag); + mode = va_arg(params, int); + va_end(params); + + RENC_PN_CONV_PATH1(path, -1); + + ret = open(path1_enc, oflag, mode); + + free(path1_malloc_wt_encoded); + return ret; +} + +int renc_pn_unlink(const char *path) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + int ret; + + RENC_PN_CONV_PATH1(path, -1); + + ret = unlink(path1_enc); + free(path1_malloc_wt_encoded); + return ret; +} + +FILE *renc_pn_fopen(const char *path, const char *mode) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + FILE *ret; + + RENC_PN_CONV_PATH1(path,NULL); + + ret = fopen(path1_enc,mode); + free(path1_malloc_wt_encoded); + return ret; +} + + +ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz) +{ + RENC_PN_DECL_SAVERRNO_PATH1(path); + ssize_t ret; + + RENC_PN_CONV_PATH1(path, -1); + + ret = readlink(path1_enc, buf, bufsiz); + + if (ret > 0) { + char *new_buf = NULL; + errno = 0; + new_buf = str_worktree2repolen(buf, ret); + if (new_buf) { + size_t newlen = strlen(new_buf); + if (newlen > bufsiz) + newlen = bufsiz; + memcpy(buf, new_buf, newlen); + ret = newlen; + free(new_buf); + } else if (!errno) + errno = olderrno; + } + free(path1_malloc_wt_encoded); + return ret; +} + +int renc_pn_symlink(const char *oldname, const char *newname) +{ + RENC_PN_DECL_SAVERRNO_PATH1(oldname); + RENC_PN_DECL_PATH2(newname); + int ret; + + RENC_PN_CONV_PATH1(oldname, -1); + RENC_PN_CONV_PATH2(newname); + + ret = symlink(path1_enc, path2_enc); + free(path1_malloc_wt_encoded); + free(path2_malloc_wt_encoded); + return ret; +} + +int renc_pn_rename(const char *oldname, const char *newname) +{ + RENC_PN_DECL_SAVERRNO_PATH1(oldname); + RENC_PN_DECL_PATH2(newname); + int ret; + + RENC_PN_CONV_PATH1(oldname, -1); + RENC_PN_CONV_PATH2(newname); + + ret = rename(path1_enc, path2_enc); + free(path1_malloc_wt_encoded); + free(path2_malloc_wt_encoded); + + return ret; +} diff --git a/compat/reencode_pathname.h b/compat/reencode_pathname.h new file mode 100644 index 0000000..9300ba4 --- /dev/null +++ b/compat/reencode_pathname.h @@ -0,0 +1,70 @@ +#ifndef REENCODE_PATHNAME_H +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <fcntl.h> +#include <stdio.h> +#include <iconv.h> + +typedef struct dirent_psx { + ino_t d_ino; /* Posix */ + size_t max_name_len; /* See below */ + unsigned char d_type; /* available on all systems git runs on */ + + /* + * See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html + * NAME_MAX + 1 should be enough, but some systems have + * NAME_MAX=255 and strlen(d_name) may return 508 or 510 + * Solution: allocate more when needed, see renc_pn_readdir() + */ + char d_name[/* NAME_MAX */ 1+1]; +} dirent_psx; + +typedef struct { + iconv_t ic_wt_to_repo; + DIR *dirp; + struct dirent_psx *dirent_utf8; +} RENC_FN_DIR; + +void reencode_argv(int argc, const char **argv); +void probe_utf8_pathname_composition(char *, int); + +RENC_FN_DIR *renc_pn_opendir(const char *dirname); +struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *dirp); +int renc_pn_closedir(RENC_FN_DIR *dirp); + +#ifdef PATH_ENCODING +char* str_repo2worktree(const char *in); +int renc_pn_mkdir(const char *path, mode_t mode); +int renc_pn_lstat(const char *path, struct stat *buf); +int renc_pn_stat(const char *path, struct stat *buf); +int renc_pn_open(const char *path, int oflag, ... ); +int renc_pn_unlink(const char *path); +FILE *renc_pn_fopen(const char *path, const char *mode); +ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz); +int renc_pn_symlink(const char *oldname, const char *newname); +int renc_pn_rename(const char *oldname, const char *newname); +#endif + +#ifndef REENCODE_PATHNAME_C +#define opendir(n) renc_pn_opendir(n) +#define readdir(d) renc_pn_readdir(d) +#define closedir(d) renc_pn_closedir(d) +#define dirent dirent_psx +#define DIR RENC_FN_DIR + +#ifdef PATH_ENCODING +#define mkdir(a,b) renc_pn_mkdir((a),(b)) +#define lstat(a,b) renc_pn_lstat((a),(b)) +#define stat(a,b) renc_pn_stat((a),(b)) +#define open renc_pn_open +#define unlink renc_pn_unlink +#define fopen(a,b) renc_pn_fopen((a),(b)) +#define readlink(a,b,c) renc_pn_readlink(a,b,c) +#define symlink(a,b) renc_pn_symlink(a,b) +#define rename(a,b) renc_pn_rename(a,b) +#endif + +#endif /* REENCODE_PATHNAME_C */ +#define REENCODE_PATHNAME_H +#endif /* REENCODE_PATHNAME_H */ diff --git a/config.c b/config.c index 2b706ea..d591c09 100644 --- a/config.c +++ b/config.c @@ -775,6 +775,9 @@ static int git_default_i18n_config(const char *var, const char *value) if (!strcmp(var, "i18n.logoutputencoding")) return git_config_string(&git_log_output_encoding, var, value); + if (!strcmp(var, "i18n.pathencoding")) + return git_config_string(&wt_path_encoding, var, value); + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/environment.c b/environment.c index 85edd7f..ba81575 100644 --- a/environment.c +++ b/environment.c @@ -59,6 +59,7 @@ int grafts_replace_parents = 1; int core_apply_sparse_checkout; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ +const char *wt_path_encoding = NULL; struct startup_info *startup_info; unsigned long pack_size_limit_cfg; diff --git a/git-compat-util.h b/git-compat-util.h index 35b095e..877b060 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -153,13 +153,21 @@ #endif #endif -/* used on Mac OS X */ -#ifdef PRECOMPOSE_UNICODE -#include "compat/precompose_utf8.h" +#if defined(PATH_ENCODING) || defined(PRECOMPOSE_UNICODE) +#include "compat/reencode_pathname.h" #else -#define precompose_str(in,i_nfd2nfc) -#define precompose_argv(c,v) -#define probe_utf8_pathname_composition(a,b) +#define reencode_argv(c,v) +#endif + +/* needed for Mac OS X */ +#ifndef PRECOMPOSE_UNICODE +#define probe_utf8_pathname_composition(a,b); +#endif + +#ifndef PATH_ENCODING +#define str_worktree2repolen(in, insz) (NULL) +#define str_repo2worktree(in) (NULL) +#define str_worktree2repo(in) (NULL) #endif #ifndef NO_LIBGEN_H diff --git a/parse-options.c b/parse-options.c index c1c66bd..5840c18 100644 --- a/parse-options.c +++ b/parse-options.c @@ -476,7 +476,7 @@ int parse_options(int argc, const char **argv, const char *prefix, usage_with_options(usagestr, options); } - precompose_argv(argc, argv); + reencode_argv(argc, argv); return parse_options_end(&ctx); } diff --git a/t/t3911-i18n-filename-8859.sh b/t/t3911-i18n-filename-8859.sh new file mode 100755 index 0000000..aa2be57 --- /dev/null +++ b/t/t3911-i18n-filename-8859.sh @@ -0,0 +1,251 @@ +#!/bin/sh +# +# Copyright (c) 2010 Torsten Bögershausen +# + +test_description='file system encodings UTF-8 ISO8859-1' + +. ./test-lib.sh + +fname_UTF_8=`printf '\303\206\302\242'` +fname_ISO8859_1=`printf '\306\242'` +Euro_utf8=`printf '\342\202\254'` +supportspathencoding=`git config core.supportspathencoding` || : + + +add_file_dir_link() { + local bname=$1 + local fname=$2 + test_expect_success "add file $fname.f $bname" ' + git checkout master && + git checkout -b add_f_$bname && + >$fname.f && + git add $fname.f && + git commit -m "add fname" + ' + + test_expect_success "add dir $fname.d $bname" ' + git checkout master && + git checkout -b add_d_$bname && + mkdir $fname.d && + touch $fname.d/$fname.f && + git add $fname.d/$fname.f && + git commit -m "add fname.d/fname" + ' + + i=0 + for src in x $fname; do + for dst in x $fname; do + test_expect_success "add link $dst.l->$src.f on branch add_l_${i}_$bname" ' + git checkout master && + git checkout -b add_l_${i}_$bname && + ln -s $src.f $dst.l && + git add $dst.l && + git commit -m "add fname.l $i" + ' + i=$(($i+1)) + done + done +} + +test_expect_success "setup add rm x" ' + >x && + git add x && + git commit -m "1st commit" && + git rm x && + git commit -m "rm x" +' + +#combinations to be tested: +# UTF-8 -> ISO8859-1 +# ISO8859-1 -> UTF-8 + +if test "$supportspathencoding" +then + srcencodings="ISO8859-1 UTF-8" + for srcenc in $srcencodings + do + case $srcenc in + ISO8859-1) + dstenc=UTF-8 + ;; + UTF-8) + dstenc=ISO8859-1 + ;; + UTF-8-MAC) + dstenc=UTF-8 + ;; + *) + echo >&2 "Wrong encoding $srcenc" + exit 1 + ;; + esac + eval fname_src=\$fname_$(echo $srcenc | sed -e 's/-/_/g' -e 's/_MAC//') + eval fname_dst=\$fname_$(echo $dstenc | sed -e 's/-/_/g') + test_expect_success "setup $srcenc" ' + git checkout master && + git config i18n.pathencoding $srcenc + ' + add_file_dir_link $srcenc $fname_src + + test_expect_success "setup $dstenc" ' + git checkout master && + echo "git checkout Master" >&2 + ls -l >&2 + git config i18n.pathencoding $dstenc + ' + + test_expect_success "checkout file $dstenc (was $srcenc)" ' + git checkout add_f_$srcenc + ' + + test_expect_success "exists file $dstenc (was $srcenc)" ' + test -f $fname_dst.f + ' + + test_expect_success "log file $dstenc (was $srcenc)" ' + git log $fname_dst.f + ' + + test_expect_success "git mv" ' + git checkout -b mv_file_$srcenc && + git mv $fname_dst.f XX.f && + git commit -m "git mv fname_dst.f XX.f" + ' + + test_expect_success "checkout dir $dstenc (was $srcenc)" ' + git checkout add_d_$srcenc + ' + + test_expect_success "exist dir $dstenc (was $srcenc)" ' + test -d $fname_dst.d + ' + + test_expect_success "log dir $dstenc (was $srcenc)" ' + git log $fname_dst.d + ' + + i=0 + for src in x $fname_dst; do + for dst in x $fname_dst; do + test_expect_success "checkout link $dst.l->$src.f branch add_l_${i}_$srcenc" ' + git checkout add_l_${i}_$srcenc + ' + test_expect_success "exist link $dst.l->$src.f branch add_l_${i}_$srcenc" ' + test -L $dst.l + ' + test_expect_success "log link $dst.l->$src.f branch add_l_${i}_$srcenc" ' + git log $dst.l + ' + test_expect_success "readlink $dst.l->$src.f branch add_l_${i}_$srcenc" ' + echo "$src.f" >expect && + readlink "$dst.l" > actual && + test_cmp expect actual && + rm expect actual + ' + i=$(($i+1)) + done + done + done + # Make sure that Euro sign can NOT be checked out in 8859 + #fname_src=Euro + test_expect_success "setup UTF-8" ' + git checkout master && + git config i18n.pathencoding UTF-8 + ' + add_file_dir_link Euro $Euro_utf8 + + test_expect_success "setup ISO8859-1" ' + git checkout master && + rm -rf * && + git config i18n.pathencoding ISO8859-1 + ' + test_expect_success "checkout file Euro branch add_f_Euro" ' + git checkout add_f_Euro + echo * >actual && + echo "*" >expect && + test_cmp expect actual && + rm expect actual + ' + + test_expect_success "checkout dir Euro branch add_d_Euro" ' + rm -rf * && + test_must_fail git checkout add_d_Euro + ' + + test_expect_success "Cleanup" ' + git config i18n.pathencoding UTF-8 && + git checkout master && + rm -rf * && + git reset --hard && + git config i18n.pathencoding ISO8859-1 + ' + + test_expect_success "checkout link Euro.l->x.f branch add_l_1_Euro" ' + ! git checkout add_l_1_Euro + ' + + test_expect_success "No link Euro.l->x.f" ' + echo * >actual && + echo "*" >expect && + test_cmp expect actual && + rm expect actual + ' + + test_expect_success "Cleanup after Euro.l->x.f" ' + git config i18n.pathencoding UTF-8 && + git checkout master && + rm -rf * && + git reset --hard && + git config i18n.pathencoding ISO8859-1 + ' + + # Checkoing out a soft link pointing to a filename outside + # 8859-1 should fail + test_expect_failure "checkout link x.l->Euro.f branch add_l_2_Euro" ' + ! git checkout add_l_2_Euro + ' + + test_expect_success "No link x.f->Euro.l" ' + echo * >actual && + echo "*" >expect && + test_cmp expect actual && + rm expect actual + ' + + test_expect_success "Cleanup after link x.l->Euro.f branch" ' + git config i18n.pathencoding UTF-8 && + git checkout master && + rm -rf * && + git reset --hard && + git config i18n.pathencoding ISO8859-1 + ' + + test_expect_success "checkout link Euro.l->Euro.f branch add_l_3_Euro" ' + ! git checkout add_l_3_Euro + ' + + test_expect_success "No link Euro.l->Euro.f" ' + echo * >actual && + echo "*" >expect && + test_cmp expect actual && + rm expect actual + ' + +else + test_expect_success "setup 8859" ' + git config i18n.pathencoding ISO8859-1 && + git checkout -b add_file_8859 && + > $fname_src.f && + git add $fname_src.f && + git commit -m "add fname_src" && + git config i18n.pathencoding UTF-8 && + rm -rf * && + git reset --hard + ' + test_expect_success "Silent support of pathencoding" ' + test_must_fail test -f $fname_UTF_8.f + ' +fi + +test_done diff --git a/wt-status.c b/wt-status.c index c110cbc..1590caa 100644 --- a/wt-status.c +++ b/wt-status.c @@ -233,7 +233,26 @@ static void wt_status_print_trailer(struct wt_status *s) status_printf_ln(s, color(WT_STATUS_HEADER, s), ""); } -#define quote_path quote_path_relative +#ifdef PATH_ENCODING +char *quote_path_repo2worktree(const char *in, int len, + struct strbuf *out, const char *prefix) +{ + const char *in_encoded = in; + char *in_worktree_encoded = str_repo2worktree(in); + char *ret; + (void)len; + + if (in_worktree_encoded) + in_encoded = in_worktree_encoded; + ret = quote_path_relative(in_encoded , -1, out, prefix); + free(in_worktree_encoded); + return ret; + +} + #define quote_path quote_path_repo2worktree +#else + #define quote_path quote_path_relative +#endif static void wt_status_print_unmerged_data(struct wt_status *s, struct string_list_item *it) -- 1.7.12 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html