[RFC] i18n.pathencoding

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Allow path names to be encoded in UTF-8 in the repository
and checkout out as e.g. ISO-8859-1 in the working tree.

Introduce a config variable i18n.pathEncoding.
If empty, no re-encoding of path names is done.

Add t3911 to test encoding back and forth

The re-encoding is done in compat/reencode_pathname.c,
where all file system functions like open(), stat(),
readdir() are re-defined.

reencode_pathname.c includes all functionality from
precompose_utf8.c, which should be removed

Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>
---
Please read this as an RFC, so there several limitations:

 compat/reencode_pathname.h defines struct dirent_psx with d_name[2].
   This is done to test renc_pn_readdir() in compat/reencode_pathname.c

 test case t1450 failes even on one of my linux machines. At first glance
  it looks as the same failure which has been sometimes observed on Mac OS X.

 compat/precompose_utf8.[ch] had been integrated into reencode_pathname.[ch],
  and should be removed.

 The patch should work on v7.1.12, it's not tested against latest master 

 Comments are welcome.


 Documentation/config.txt      |  10 +
 Makefile                      |  11 +-
 builtin/init-db.c             |   3 +
 cache.h                       |   1 +
 compat/reencode_pathname.c    | 441 ++++++++++++++++++++++++++++++++++++++++++
 compat/reencode_pathname.h    |  72 +++++++
 config.c                      |   3 +
 environment.c                 |   1 +
 git-compat-util.h             |  20 +-
 parse-options.c               |   2 +-
 t/t3911-i18n-filename-8859.sh | 251 ++++++++++++++++++++++++
 wt-status.c                   |  21 +-
 12 files changed, 827 insertions(+), 9 deletions(-)
 create mode 100644 compat/reencode_pathname.c
 create mode 100644 compat/reencode_pathname.h
 create mode 100755 t/t3911-i18n-filename-8859.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index a95e5a4..d633d54 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1442,6 +1442,16 @@ i18n.logOutputEncoding::
 	Character encoding the commit messages are converted to when
 	running 'git log' and friends.
 
+i18n.pathEncoding::
+	This option is only used by some implementations of git.
+	When "git init" sets core.supportspathencoding to true,
+	i18n.pathEncoding can be set to re-encode path names when
+	a working tree is checked out.
+	Path names may be e.g. encoded in ISO-8859-1 and are stored as
+	UTF-8 encoded in the repository.
+	When not set, the encoding of path names is the same in working tree
+	and the repository.
+
 imap::
 	The configuration variables in the 'imap' section are described
 	in linkgit:git-imap-send[1].
diff --git a/Makefile b/Makefile
index 6b0c961..141562e 100644
--- a/Makefile
+++ b/Makefile
@@ -143,6 +143,9 @@ all::
 #
 # Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
 #
+# Define PATH_ENCODING if the encoding of file names
+# differs from the encoding in the git repo
+#
 # Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
 # Patrick Mauritz).
 #
@@ -595,6 +598,7 @@ LIB_H += compat/bswap.h
 LIB_H += compat/cygwin.h
 LIB_H += compat/mingw.h
 LIB_H += compat/obstack.h
+LIB_H += compat/reencode_pathname.h
 LIB_H += compat/precompose_utf8.h
 LIB_H += compat/terminal.h
 LIB_H += compat/win32/dirent.h
@@ -932,6 +936,7 @@ ifeq ($(uname_S),OSF1)
 	NO_NSEC = YesPlease
 endif
 ifeq ($(uname_S),Linux)
+	PATH_ENCODING = YesPlease
 	NO_STRLCPY = YesPlease
 	NO_MKSTEMPS = YesPlease
 	HAVE_PATHS_H = YesPlease
@@ -999,7 +1004,7 @@ ifeq ($(uname_S),Darwin)
 	NO_MEMMEM = YesPlease
 	USE_ST_TIMESPEC = YesPlease
 	HAVE_DEV_TTY = YesPlease
-	COMPAT_OBJS += compat/precompose_utf8.o
+	COMPAT_OBJS += compat/reencode_pathname.o
 	BASIC_CFLAGS += -DPRECOMPOSE_UNICODE
 endif
 ifeq ($(uname_S),SunOS)
@@ -1591,6 +1596,10 @@ ifdef FREAD_READS_DIRECTORIES
 	COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
 	COMPAT_OBJS += compat/fopen.o
 endif
+ifdef PATH_ENCODING
+	COMPAT_CFLAGS += -DPATH_ENCODING
+	COMPAT_OBJS += compat/reencode_pathname.o
+endif
 ifdef NO_SYMLINK_HEAD
 	BASIC_CFLAGS += -DNO_SYMLINK_HEAD
 endif
diff --git a/builtin/init-db.c b/builtin/init-db.c
index 244fb7f..f159d43 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -291,6 +291,9 @@ static int create_default_files(const char *template_path)
 		if (!access(path, F_OK))
 			git_config_set("core.ignorecase", "true");
 		probe_utf8_pathname_composition(path, len);
+#ifdef PATH_ENCODING
+		git_config_set("core.supportspathencoding", "true");
+#endif
 	}
 
 	return reinit;
diff --git a/cache.h b/cache.h
index 67f28b4..8023767 100644
--- a/cache.h
+++ b/cache.h
@@ -1160,6 +1160,7 @@ extern int user_ident_sufficiently_given(void);
 extern const char *git_commit_encoding;
 extern const char *git_log_output_encoding;
 extern const char *git_mailmap_file;
+extern const char *wt_path_encoding;
 
 /* IO helper functions */
 extern void maybe_flush_or_die(FILE *, const char *);
diff --git a/compat/reencode_pathname.c b/compat/reencode_pathname.c
new file mode 100644
index 0000000..3bdc776
--- /dev/null
+++ b/compat/reencode_pathname.c
@@ -0,0 +1,441 @@
+/*
+ * Converts pathnames from one encoding into another.
+ * The pathnames are stored as UTF-8 in the repository,
+ * and might be checkout out as e.g. ISO-8859-1 in the working tree
+ *
+ * On MacOS X decomposed unicode is converted into precomposed unicode.
+ */
+
+#define REENCODE_PATHNAME_C
+#include "cache.h"
+#include "utf8.h"
+#include "reencode_pathname.h"
+
+#if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6))
+	typedef const char *iconv_ibp;
+#else
+	typedef char *iconv_ibp;
+#endif
+
+const static char *repo_path_encoding = "UTF-8";
+
+static iconv_t iconv_open_or_die(const char *tocode, const char *fromcode)
+{
+	iconv_t my_iconv;
+	my_iconv = iconv_open(tocode, fromcode);
+	if (my_iconv == (iconv_t) -1)
+		die_errno(_("iconv_open(%s,%s) failed"), tocode, fromcode);
+	return my_iconv;
+}
+
+static size_t has_non_ascii(const char *s, size_t maxlen, size_t *strlen_c)
+{
+	const uint8_t *ptr = (const uint8_t *)s;
+	size_t strlen_chars = 0;
+	size_t ret = 0;
+
+	if (!ptr || !*ptr)
+		return 0;
+
+	while (*ptr && maxlen) {
+		if (*ptr & 0x80)
+			ret++;
+		strlen_chars++;
+		ptr++;
+		maxlen--;
+	}
+	if (strlen_c)
+		*strlen_c = strlen_chars;
+
+	return ret;
+}
+
+#ifdef PRECOMPOSE_UNICODE
+void probe_utf8_pathname_composition(char *path, int len)
+{
+	static const char *auml_nfc = "\xc3\xa4";
+	static const char *auml_nfd = "\x61\xcc\x88";
+	int output_fd;
+	if (precomposed_unicode != -1)
+		return; /* We found it defined in the global config, respect it */
+	strcpy(path + len, auml_nfc);
+	output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600);
+	if (output_fd >= 0) {
+		close(output_fd);
+		strcpy(path + len, auml_nfd);
+		/* Indicate to the user, that we can configure it to true */
+		if (!access(path, R_OK))
+			git_config_set("core.precomposeunicode", "false");
+		/* To be backward compatible, set precomposed_unicode to 0 */
+		precomposed_unicode = 0;
+		strcpy(path + len, auml_nfc);
+		if (unlink(path))
+			die_errno(_("failed to unlink '%s'"), path);
+	}
+}
+#endif
+
+void reencode_argv(int argc, const char **argv)
+{
+	int i = 0;
+	const char *oldarg;
+	char *newarg;
+	iconv_t ic_wt_to_repo;
+
+#ifdef PRECOMPOSE_UNICODE
+	if (precomposed_unicode == 1)
+		wt_path_encoding = "UTF-8-MAC";
+#endif
+
+	if (!wt_path_encoding || !*wt_path_encoding)
+		return;
+
+	ic_wt_to_repo = iconv_open_or_die(repo_path_encoding, wt_path_encoding);
+
+	while (i < argc) {
+		size_t namelen;
+		oldarg = argv[i];
+		if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
+			newarg = reencode_string_iconv(oldarg, namelen, ic_wt_to_repo);
+			if (newarg)
+				argv[i] = newarg;
+		}
+		i++;
+	}
+	iconv_close(ic_wt_to_repo);
+}
+
+#ifdef PATH_ENCODING
+char *str_repo2worktree(const char *in)
+{
+	int olderrno = errno;
+	char *retvalue = NULL;
+	size_t inlen;
+
+	if (!wt_path_encoding || !*wt_path_encoding)
+		return NULL;
+
+	if (!in)
+		die("str_repo2worktree in == NULL\n");
+
+	if (has_non_ascii(in, (size_t)-1, &inlen)) {
+		iconv_t my_iconv_repo2worktree;
+		my_iconv_repo2worktree = iconv_open_or_die(wt_path_encoding,
+																							 repo_path_encoding);
+
+		retvalue = reencode_string_iconv(in, inlen, my_iconv_repo2worktree);
+		iconv_close(my_iconv_repo2worktree);
+		if (retvalue)
+			errno = olderrno;
+	} else
+		errno = olderrno;
+
+	return retvalue;
+}
+
+char *str_worktree2repolen(const char *in, size_t insz)
+{
+	char *retvalue = NULL;
+	size_t inlen;
+	if (!wt_path_encoding || !*wt_path_encoding)
+		return NULL;
+
+	if (has_non_ascii(in, insz, &inlen)) {
+	int olderrno = errno;
+		iconv_t my_iconv_worktree2repo;
+		my_iconv_worktree2repo = iconv_open_or_die(repo_path_encoding,
+																							 wt_path_encoding);
+		retvalue = reencode_string_iconv(in, insz, my_iconv_worktree2repo);
+		iconv_close(my_iconv_worktree2repo);
+		if (retvalue)
+			errno = olderrno;
+	}
+	return retvalue;
+}
+
+char *str_worktree2repo(const char *in)
+{
+	return str_worktree2repolen(in, strlen(in));
+}
+#endif
+
+#define RENC_PN_DECL_SAVERRNO_PATH1(path) \
+	int olderrno = errno; \
+	const char *path1_enc = path; \
+	char *path1_malloc_wt_encoded = NULL
+
+#define RENC_PN_DECL_PATH2(path) \
+	const char *path2_enc = path; \
+	char *path2_malloc_wt_encoded = NULL
+
+
+#define RENC_PN_CONV_PATH1(path, erroret) \
+	errno=0; \
+	path1_malloc_wt_encoded = str_repo2worktree(path); \
+	if (!path1_malloc_wt_encoded && errno) { \
+		return erroret; \
+	} \
+	if (path1_malloc_wt_encoded) \
+		path1_enc = path1_malloc_wt_encoded; \
+	errno = olderrno;
+
+#define RENC_PN_CONV_PATH2(path) \
+	errno=0; \
+	path2_malloc_wt_encoded = str_repo2worktree(path); \
+	if (!path2_malloc_wt_encoded && errno) { \
+		return -1; \
+	} \
+	if (path2_malloc_wt_encoded) \
+		path2_enc = path2_malloc_wt_encoded; \
+	errno = olderrno;
+
+
+RENC_FN_DIR *renc_pn_opendir(const char *dirname)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(dirname);
+	RENC_FN_DIR *renc_pn_dir = xmalloc(sizeof(RENC_FN_DIR));
+
+#ifdef PRECOMPOSE_UNICODE
+	if (precomposed_unicode == 1)
+		wt_path_encoding = "UTF-8-MAC";
+#endif
+
+	renc_pn_dir->dirent_utf8 = xmalloc(sizeof(dirent_psx));
+	renc_pn_dir->dirent_utf8->max_name_len = sizeof(renc_pn_dir->dirent_utf8->d_name);
+
+	RENC_PN_CONV_PATH1(dirname, NULL);
+
+	renc_pn_dir->dirp = opendir(path1_enc);
+	olderrno = errno;
+	if (!renc_pn_dir->dirp) {
+		free(path1_malloc_wt_encoded);
+		free(renc_pn_dir->dirent_utf8);
+		free(renc_pn_dir);
+		return NULL;
+	} else
+		renc_pn_dir->ic_wt_to_repo = (iconv_t)-1;
+
+	free(path1_malloc_wt_encoded);
+	errno = olderrno;
+	return renc_pn_dir;
+}
+
+struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *renc_pn_dir)
+{
+	struct dirent *res;
+	res = readdir(renc_pn_dir->dirp);
+	if (res) {
+		size_t namelenz = strlen(res->d_name) + 1; /* \0 */
+		size_t new_len_needed = 0;
+		int ret_errno = errno;
+
+		renc_pn_dir->dirent_utf8->d_ino	 = res->d_ino;
+		renc_pn_dir->dirent_utf8->d_type = res->d_type;
+	do {
+		 if (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len) {
+				size_t new_len = sizeof(dirent_psx) + new_len_needed -
+					sizeof(renc_pn_dir->dirent_utf8->d_name);
+
+				renc_pn_dir->dirent_utf8 = xrealloc(renc_pn_dir->dirent_utf8, new_len);
+				renc_pn_dir->dirent_utf8->max_name_len = new_len_needed;
+			}
+
+			if (wt_path_encoding && has_non_ascii(res->d_name, (size_t)-1, NULL)) {
+				iconv_ibp cp = (iconv_ibp)res->d_name;
+				size_t inleft = namelenz;
+				char *outpos = &renc_pn_dir->dirent_utf8->d_name[0];
+				size_t outsz = renc_pn_dir->dirent_utf8->max_name_len;
+				errno = 0;
+				if (renc_pn_dir->ic_wt_to_repo == (iconv_t)-1)
+					renc_pn_dir->ic_wt_to_repo = iconv_open_or_die(repo_path_encoding,
+																												 wt_path_encoding);
+				if (-1 != iconv(renc_pn_dir->ic_wt_to_repo,
+												&cp, &inleft,	&outpos, &outsz))
+					break; /* Conversion OK, we are done */
+				if (errno == E2BIG) {
+					char *tmp = reencode_string_iconv(res->d_name, namelenz,
+																						renc_pn_dir->ic_wt_to_repo);
+					if (tmp) {
+						new_len_needed = strlen(tmp) + 1; /* \0 */
+						free(tmp);
+					}
+				} else {
+					/*
+					 * iconv() failed and errno could be EILSEQ, EINVAL, EBADF
+					 * In general we avoid illegal byte sequences.
+					 * If they occur on a mounted drive (e.g. NFS) it is not worth to
+					 * die() for that, but rather let the user see the original name
+					 */
+					namelenz = 0; /* trigger strlcpy */
+				}
+			} else {
+				if (namelenz > renc_pn_dir->dirent_utf8->max_name_len)
+					new_len_needed = namelenz; /* need to re-allocate */
+				else
+					namelenz = 0;	 /* trigger strlcpy */
+			}
+		} while (new_len_needed > renc_pn_dir->dirent_utf8->max_name_len);
+
+		if (!namelenz)
+			strlcpy(renc_pn_dir->dirent_utf8->d_name, res->d_name,
+							renc_pn_dir->dirent_utf8->max_name_len);
+
+		errno = ret_errno;
+		return renc_pn_dir->dirent_utf8;
+	}
+	return NULL;
+}
+
+int renc_pn_closedir(RENC_FN_DIR *renc_pn_dir)
+{
+	int ret_value;
+	int ret_errno;
+	ret_value = closedir(renc_pn_dir->dirp);
+	ret_errno = errno;
+	if (renc_pn_dir->ic_wt_to_repo != (iconv_t)-1)
+		iconv_close(renc_pn_dir->ic_wt_to_repo);
+	free(renc_pn_dir->dirent_utf8);
+	free(renc_pn_dir);
+	errno = ret_errno;
+	return ret_value;
+}
+
+int renc_pn_mkdir(const char *path, mode_t mode)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	int ret;
+
+	RENC_PN_CONV_PATH1(path, -1);
+
+	ret = mkdir(path1_enc, mode);
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+int renc_pn_lstat(const char *path, struct stat *buf)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	int ret;
+
+	RENC_PN_CONV_PATH1(path, -1);
+
+	ret = lstat(path1_enc, buf);
+
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+int renc_pn_stat(const char *path, struct stat *buf)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	int ret;
+
+	RENC_PN_CONV_PATH1(path, -1);
+
+	ret = stat(path1_enc, buf);
+
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+int renc_pn_open(const char *path, int oflag, ...	 )
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	va_list params;
+	int mode;
+	int ret;
+
+	va_start(params, oflag);
+	mode = va_arg(params, int);
+	va_end(params);
+
+	RENC_PN_CONV_PATH1(path, -1);
+
+	ret = open(path1_enc, oflag, mode);
+
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+int renc_pn_unlink(const char *path)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	int ret;
+
+	RENC_PN_CONV_PATH1(path, -1);
+
+	ret = unlink(path1_enc);
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+FILE *renc_pn_fopen(const char *path, const char *mode)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	FILE *ret;
+
+	RENC_PN_CONV_PATH1(path,NULL);
+
+	ret = fopen(path1_enc,mode);
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+
+ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(path);
+	ssize_t ret;
+
+	RENC_PN_CONV_PATH1(path, -1);
+
+	ret = readlink(path1_enc, buf, bufsiz);
+
+	if (ret > 0) {
+		char *new_buf = NULL;
+		errno = 0;
+		new_buf = str_worktree2repolen(buf, ret);
+		if (new_buf) {
+			size_t newlen = strlen(new_buf);
+			if (newlen > bufsiz)
+				newlen = bufsiz;
+			memcpy(buf, new_buf, newlen);
+			ret = newlen;
+			free(new_buf);
+		} else if (!errno)
+			errno = olderrno;
+	}
+	free(path1_malloc_wt_encoded);
+	return ret;
+}
+
+int renc_pn_symlink(const char *oldname, const char *newname)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(oldname);
+	RENC_PN_DECL_PATH2(newname);
+	int ret;
+
+	RENC_PN_CONV_PATH1(oldname, -1);
+	RENC_PN_CONV_PATH2(newname);
+
+	ret = symlink(path1_enc, path2_enc);
+	free(path1_malloc_wt_encoded);
+	free(path2_malloc_wt_encoded);
+	return ret;
+}
+
+int renc_pn_rename(const char *oldname, const char *newname)
+{
+	RENC_PN_DECL_SAVERRNO_PATH1(oldname);
+	RENC_PN_DECL_PATH2(newname);
+	int ret;
+
+	RENC_PN_CONV_PATH1(oldname, -1);
+	RENC_PN_CONV_PATH2(newname);
+
+	ret = rename(path1_enc, path2_enc);
+	free(path1_malloc_wt_encoded);
+	free(path2_malloc_wt_encoded);
+
+	return ret;
+}
diff --git a/compat/reencode_pathname.h b/compat/reencode_pathname.h
new file mode 100644
index 0000000..9300ba4
--- /dev/null
+++ b/compat/reencode_pathname.h
@@ -0,0 +1,70 @@
+#ifndef REENCODE_PATHNAME_H
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <iconv.h>
+
+typedef struct dirent_psx {
+	ino_t d_ino;            /* Posix */
+	size_t max_name_len;    /* See below */
+	unsigned char d_type;   /* available on all systems git runs on */
+
+	/*
+	 * See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html
+	 * NAME_MAX + 1 should be enough, but some systems have
+	 * NAME_MAX=255 and strlen(d_name) may return 508 or 510
+	 * Solution: allocate more when needed, see renc_pn_readdir()
+	 */
+	char   d_name[/* NAME_MAX */ 1+1];
+} dirent_psx;
+
+typedef struct {
+	iconv_t ic_wt_to_repo;
+	DIR *dirp;
+	struct dirent_psx *dirent_utf8;
+} RENC_FN_DIR;
+
+void reencode_argv(int argc, const char **argv);
+void probe_utf8_pathname_composition(char *, int);
+
+RENC_FN_DIR *renc_pn_opendir(const char *dirname);
+struct dirent_psx *renc_pn_readdir(RENC_FN_DIR *dirp);
+int renc_pn_closedir(RENC_FN_DIR *dirp);
+
+#ifdef PATH_ENCODING
+char* str_repo2worktree(const char *in);
+int renc_pn_mkdir(const char *path, mode_t mode);
+int renc_pn_lstat(const char *path, struct stat *buf);
+int renc_pn_stat(const char *path, struct stat *buf);
+int renc_pn_open(const char *path, int oflag, ...  );
+int renc_pn_unlink(const char *path);
+FILE *renc_pn_fopen(const char *path, const char *mode);
+ssize_t renc_pn_readlink(const char *path, char *buf, size_t bufsiz);
+int renc_pn_symlink(const char *oldname, const char *newname);
+int renc_pn_rename(const char *oldname, const char *newname);
+#endif
+
+#ifndef REENCODE_PATHNAME_C
+#define opendir(n) renc_pn_opendir(n)
+#define readdir(d) renc_pn_readdir(d)
+#define closedir(d) renc_pn_closedir(d)
+#define dirent dirent_psx
+#define DIR RENC_FN_DIR
+
+#ifdef PATH_ENCODING
+#define mkdir(a,b) renc_pn_mkdir((a),(b))
+#define lstat(a,b) renc_pn_lstat((a),(b))
+#define stat(a,b) renc_pn_stat((a),(b))
+#define open renc_pn_open
+#define unlink renc_pn_unlink
+#define fopen(a,b) renc_pn_fopen((a),(b))
+#define readlink(a,b,c) renc_pn_readlink(a,b,c)
+#define symlink(a,b) renc_pn_symlink(a,b)
+#define rename(a,b) renc_pn_rename(a,b)
+#endif
+
+#endif  /* REENCODE_PATHNAME_C */
+#define  REENCODE_PATHNAME_H
+#endif /* REENCODE_PATHNAME_H */
diff --git a/config.c b/config.c
index 2b706ea..d591c09 100644
--- a/config.c
+++ b/config.c
@@ -775,6 +775,9 @@ static int git_default_i18n_config(const char *var, const char *value)
 	if (!strcmp(var, "i18n.logoutputencoding"))
 		return git_config_string(&git_log_output_encoding, var, value);
 
+	if (!strcmp(var, "i18n.pathencoding"))
+		return git_config_string(&wt_path_encoding, var, value);
+
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/environment.c b/environment.c
index 85edd7f..ba81575 100644
--- a/environment.c
+++ b/environment.c
@@ -59,6 +59,7 @@ int grafts_replace_parents = 1;
 int core_apply_sparse_checkout;
 int merge_log_config = -1;
 int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */
+const char *wt_path_encoding = NULL;
 struct startup_info *startup_info;
 unsigned long pack_size_limit_cfg;
 
diff --git a/git-compat-util.h b/git-compat-util.h
index 35b095e..877b060 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -153,13 +153,21 @@
 #endif
 #endif
 
-/* used on Mac OS X */
-#ifdef PRECOMPOSE_UNICODE
-#include "compat/precompose_utf8.h"
+#if defined(PATH_ENCODING) || defined(PRECOMPOSE_UNICODE)
+#include "compat/reencode_pathname.h"
 #else
-#define precompose_str(in,i_nfd2nfc)
-#define precompose_argv(c,v)
-#define probe_utf8_pathname_composition(a,b)
+#define reencode_argv(c,v)
+#endif
+
+/* needed for Mac OS X */
+#ifndef PRECOMPOSE_UNICODE
+#define probe_utf8_pathname_composition(a,b);
+#endif
+
+#ifndef PATH_ENCODING
+#define str_worktree2repolen(in, insz) (NULL)
+#define str_repo2worktree(in) (NULL)
+#define str_worktree2repo(in) (NULL)
 #endif
 
 #ifndef NO_LIBGEN_H
diff --git a/parse-options.c b/parse-options.c
index c1c66bd..5840c18 100644
--- a/parse-options.c
+++ b/parse-options.c
@@ -476,7 +476,7 @@ int parse_options(int argc, const char **argv, const char *prefix,
 		usage_with_options(usagestr, options);
 	}
 
-	precompose_argv(argc, argv);
+	reencode_argv(argc, argv);
 	return parse_options_end(&ctx);
 }
 
diff --git a/t/t3911-i18n-filename-8859.sh b/t/t3911-i18n-filename-8859.sh
new file mode 100755
index 0000000..aa2be57
--- /dev/null
+++ b/t/t3911-i18n-filename-8859.sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# Copyright (c) 2010 Torsten Bögershausen
+#
+
+test_description='file system encodings UTF-8 ISO8859-1'
+
+. ./test-lib.sh
+
+fname_UTF_8=`printf '\303\206\302\242'`
+fname_ISO8859_1=`printf '\306\242'`
+Euro_utf8=`printf '\342\202\254'`
+supportspathencoding=`git config core.supportspathencoding` || :
+
+
+add_file_dir_link() {
+	local bname=$1
+	local fname=$2
+	test_expect_success "add file $fname.f $bname" '
+		git checkout master &&
+		git checkout -b add_f_$bname &&
+		>$fname.f &&
+		git add $fname.f &&
+		git commit -m "add fname"
+	'
+
+	test_expect_success "add dir $fname.d $bname" '
+		git checkout master &&
+		git checkout -b add_d_$bname &&
+		mkdir $fname.d &&
+		touch $fname.d/$fname.f &&
+		git add $fname.d/$fname.f &&
+		git commit -m "add fname.d/fname"
+	'
+
+	i=0
+	for src in x $fname; do
+		for dst in x $fname; do
+			test_expect_success "add link $dst.l->$src.f on branch add_l_${i}_$bname" '
+				git checkout master &&
+				git checkout -b add_l_${i}_$bname &&
+				ln -s $src.f $dst.l &&
+				git add $dst.l &&
+				git commit -m "add fname.l $i"
+			'
+			i=$(($i+1))
+		done
+	done
+}
+
+test_expect_success "setup add rm x" '
+	>x &&
+	git add x &&
+	git commit -m "1st commit" &&
+	git rm x &&
+	git commit -m "rm x"
+'
+
+#combinations to be tested:
+# UTF-8     -> ISO8859-1
+# ISO8859-1 -> UTF-8
+
+if test "$supportspathencoding"
+then
+	srcencodings="ISO8859-1 UTF-8"
+	for srcenc in $srcencodings
+	do
+		case $srcenc in
+		ISO8859-1)
+			dstenc=UTF-8
+		;;
+		UTF-8)
+			dstenc=ISO8859-1
+		;;
+		UTF-8-MAC)
+			dstenc=UTF-8
+		;;
+		*)
+			echo >&2 "Wrong encoding $srcenc"
+			exit 1
+		;;
+		esac
+		eval fname_src=\$fname_$(echo $srcenc | sed -e 's/-/_/g' -e 's/_MAC//')
+		eval fname_dst=\$fname_$(echo $dstenc | sed -e 's/-/_/g')
+		test_expect_success "setup $srcenc" '
+			git checkout master &&
+			git config i18n.pathencoding $srcenc
+		'
+		add_file_dir_link $srcenc $fname_src
+
+		test_expect_success "setup $dstenc" '
+			git checkout master &&
+			echo "git checkout Master" >&2
+			ls -l >&2
+			git config i18n.pathencoding $dstenc
+		'
+
+		test_expect_success "checkout file $dstenc (was $srcenc)" '
+			git checkout add_f_$srcenc
+		'
+
+		test_expect_success "exists file $dstenc (was $srcenc)" '
+			test -f $fname_dst.f
+		'
+
+		test_expect_success "log file $dstenc (was $srcenc)" '
+			git log $fname_dst.f
+		'
+
+		test_expect_success "git mv" '
+			git checkout -b mv_file_$srcenc &&
+			git mv $fname_dst.f XX.f &&
+			git commit -m "git mv fname_dst.f XX.f"
+		'
+
+		test_expect_success "checkout dir $dstenc (was $srcenc)" '
+			git checkout add_d_$srcenc
+		'
+
+		test_expect_success "exist dir $dstenc (was $srcenc)" '
+			test -d $fname_dst.d
+		'
+
+		test_expect_success "log dir $dstenc (was $srcenc)" '
+			git log $fname_dst.d
+		'
+
+		i=0
+		for src in x $fname_dst; do
+			for dst in x $fname_dst; do
+				test_expect_success "checkout link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+					git checkout add_l_${i}_$srcenc
+				'
+				test_expect_success "exist link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+					test -L $dst.l
+				'
+				test_expect_success "log link $dst.l->$src.f branch add_l_${i}_$srcenc" '
+					git log $dst.l
+				'
+				test_expect_success "readlink $dst.l->$src.f branch add_l_${i}_$srcenc" '
+					echo "$src.f" >expect &&
+					readlink "$dst.l" > actual &&
+					test_cmp expect actual &&
+					rm expect actual
+				'
+				i=$(($i+1))
+			done
+		done
+	done
+	# Make sure that Euro sign can NOT be checked out in 8859
+	#fname_src=Euro
+	test_expect_success "setup UTF-8" '
+		git checkout master &&
+		git config i18n.pathencoding UTF-8
+	'
+	add_file_dir_link Euro $Euro_utf8
+
+	test_expect_success "setup ISO8859-1" '
+		git checkout master &&
+		rm -rf * &&
+		git config i18n.pathencoding ISO8859-1
+	'
+	test_expect_success "checkout file Euro branch add_f_Euro" '
+		git checkout add_f_Euro
+		echo *  >actual &&
+		echo "*" >expect &&
+		test_cmp expect actual &&
+		rm expect actual
+	'
+
+	test_expect_success "checkout dir Euro branch add_d_Euro" '
+		rm -rf * &&
+		test_must_fail git checkout add_d_Euro
+	'
+
+	test_expect_success "Cleanup" '
+		git config i18n.pathencoding UTF-8 &&
+		git checkout master &&
+		rm -rf * &&
+		git reset --hard &&
+		git config i18n.pathencoding ISO8859-1
+	'
+
+	test_expect_success "checkout link Euro.l->x.f branch add_l_1_Euro" '
+		! git checkout add_l_1_Euro
+	'
+
+	test_expect_success "No link Euro.l->x.f" '
+		echo *  >actual &&
+		echo "*" >expect &&
+		test_cmp expect actual &&
+		rm expect actual
+	'
+
+	test_expect_success "Cleanup after Euro.l->x.f" '
+		git config i18n.pathencoding UTF-8 &&
+		git checkout master &&
+		rm -rf * &&
+		git reset --hard &&
+		git config i18n.pathencoding ISO8859-1
+	'
+
+	# Checkoing out a soft link pointing to a filename outside
+	# 8859-1 should fail
+	test_expect_failure "checkout link x.l->Euro.f branch add_l_2_Euro" '
+		! git checkout add_l_2_Euro
+	'
+
+	test_expect_success "No link x.f->Euro.l" '
+		echo *  >actual &&
+		echo "*" >expect &&
+		test_cmp expect actual &&
+		rm expect actual
+	'
+
+	test_expect_success "Cleanup after link x.l->Euro.f branch" '
+		git config i18n.pathencoding UTF-8 &&
+		git checkout master &&
+		rm -rf * &&
+		git reset --hard &&
+		git config i18n.pathencoding ISO8859-1
+	'
+
+	test_expect_success "checkout link Euro.l->Euro.f branch add_l_3_Euro" '
+		! git checkout add_l_3_Euro
+	'
+
+	test_expect_success "No link Euro.l->Euro.f" '
+		echo *  >actual &&
+		echo "*" >expect &&
+		test_cmp expect actual &&
+		rm expect actual
+	'
+
+else
+	test_expect_success "setup 8859" '
+		git config i18n.pathencoding ISO8859-1 &&
+		git checkout -b add_file_8859 &&
+		> $fname_src.f &&
+		git add $fname_src.f &&
+		git commit -m "add fname_src" &&
+		git config i18n.pathencoding UTF-8 &&
+		rm -rf * &&
+		git reset --hard
+	'
+	test_expect_success "Silent support of pathencoding" '
+		test_must_fail test -f $fname_UTF_8.f
+	'
+fi
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index c110cbc..1590caa 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -233,7 +233,26 @@ static void wt_status_print_trailer(struct wt_status *s)
 	status_printf_ln(s, color(WT_STATUS_HEADER, s), "");
 }
 
-#define quote_path quote_path_relative
+#ifdef PATH_ENCODING
+char *quote_path_repo2worktree(const char *in, int len,
+			  struct strbuf *out, const char *prefix)
+{
+	const char *in_encoded = in;
+	char *in_worktree_encoded = str_repo2worktree(in);
+	char *ret;
+	(void)len;
+
+	if (in_worktree_encoded)
+		in_encoded = in_worktree_encoded;
+	ret = quote_path_relative(in_encoded , -1, out, prefix);
+	free(in_worktree_encoded);
+	return ret;
+
+}
+	#define quote_path quote_path_repo2worktree
+#else
+	#define quote_path quote_path_relative
+#endif
 
 static void wt_status_print_unmerged_data(struct wt_status *s,
 					  struct string_list_item *it)
-- 
1.7.12

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]