Torsten Bögershausen venit, vidit, dixit 20.05.2010 11:02: > Hej, > I have the same problem here. > Below there is a patch, which may solve the problem. > (Yes, whitespaces are broken. I'm still fighting with > git format-patch -s --cover-letter -M --stdout origin/master | git > imap-send) > But this patch may be a start point for improvements. > Comments welcome > BR > /Torsten > > > > Improved interwork between Mac OS X and linux when umlauts are used > When a git repository containing utf-8 coded umlaut characters > is cloned onto an Mac OS X machine, the Mac OS system will convert > all filenames returned by readdir() into denormalized utf-8. > As a result of this conversion, git will not find them on disk. > This helps by treating the NFD and NFD version of filenames as > identical on Mac OS. > > > > > > > Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx> You signed off, but is Markus Kuhn's code from UCS GPL2-licensed? Also, a few tests would be nice. I remember we had threads on this issue in the past. I haven't checked yet (Thunderbird pruned my nntp history), but it is worth checking that you addressed any issues mentioned there. I have no Mac so I can't test, sorry. Would be happy to run Mac OS in a vm, but you know... Thanks for looking into this! Michael > --- > name-hash.c | 40 ++++++++++++++++++++++++++++++++++++++++ > utf8.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++------- > utf8.h | 11 +++++++++++ > 3 files changed, 99 insertions(+), 7 deletions(-) > > diff --git a/name-hash.c b/name-hash.c > index 0031d78..e6494e8 100644 > --- a/name-hash.c > +++ b/name-hash.c > @@ -7,6 +7,7 @@ > */ > #define NO_THE_INDEX_COMPATIBILITY_MACROS > #include "cache.h" > +#include "utf8.h" > > /* > * This removes bit 5 if bit 6 is set. > @@ -100,6 +101,25 @@ static int same_name(const struct cache_entry *ce, > const char *name, int namelen > return icase && slow_same_name(name, namelen, ce->name, len); > } > > +#ifdef __APPLE__ > +struct cache_entry *index_name_exists2(struct index_state *istate, > const char *name, int icase) > +{ > + int namelen = (int)strlen(name); > + unsigned int hash = hash_name(name, namelen); > + struct cache_entry *ce; > + > + ce = lookup_hash(hash, &istate->name_hash); > + while (ce) { > + if (!(ce->ce_flags & CE_UNHASHED)) { > + if (same_name(ce, name, namelen, icase)) > + return ce; > + } > + ce = ce->next; > + } > + return NULL; > +} > +#endif > + > struct cache_entry *index_name_exists(struct index_state *istate, const > char *name, int namelen, int icase) > { > unsigned int hash = hash_name(name, namelen); > @@ -115,5 +135,25 @@ struct cache_entry *index_name_exists(struct > index_state *istate, const char *na > } > ce = ce->next; > } > +#ifdef __APPLE__ > + { > + char *name_nfc_nfd; > + name_nfc_nfd = str_nfc2nfd(name); > + if (name_nfc_nfd) { > + ce = index_name_exists2(istate, name_nfc_nfd, icase); > + free(name_nfc_nfd); > + if (ce) > + return ce; > + } > + name_nfc_nfd = str_nfd2nfc(name); > + if (name_nfc_nfd) { > + ce = index_name_exists2(istate, name_nfc_nfd, icase); > + free(name_nfc_nfd); > + if (ce) > + return ce; > + } > + } > +#endif > + > return NULL; > } > diff --git a/utf8.c b/utf8.c > index 84cfc72..8e794dc 100644 > --- a/utf8.c > +++ b/utf8.c > @@ -2,6 +2,11 @@ > #include "strbuf.h" > #include "utf8.h" > > +#ifdef __APPLE__ > +static iconv_t my_iconv_nfd2nfc = (iconv_t) -1; > +static iconv_t my_iconv_nfc2nfd = (iconv_t) -1; > +#endif > + > /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */ > > struct interval { > @@ -424,18 +429,13 @@ int is_encoding_utf8(const char *name) > #else > typedef char * iconv_ibp; > #endif > -char *reencode_string(const char *in, const char *out_encoding, const > char *in_encoding) > + > +char *reencode_string_iconv(const char *in, iconv_t conv) > { > - iconv_t conv; > size_t insz, outsz, outalloc; > char *out, *outpos; > iconv_ibp cp; > > - if (!in_encoding) > - return NULL; > - conv = iconv_open(out_encoding, in_encoding); > - if (conv == (iconv_t) -1) > - return NULL; > insz = strlen(in); > outsz = insz; > outalloc = outsz + 1; /* for terminating NUL */ > @@ -469,7 +469,48 @@ char *reencode_string(const char *in, const char > *out_encoding, const char *in_e > break; > } > } > + return out; > +} > + > +char *reencode_string(const char *in, const char *out_encoding, const > char *in_encoding) > +{ > + iconv_t conv; > + char *out; > + > + if (!in_encoding) > + return NULL; > + conv = iconv_open(out_encoding, in_encoding); > + if (conv == (iconv_t) -1) > + return NULL; > + out = reencode_string_iconv(in, conv); > iconv_close(conv); > return out; > } > + > +#ifdef __APPLE__ > +char* > +str_nfc2nfd(const char *in) > +{ > + if (my_iconv_nfc2nfd == (iconv_t) -1) { > + my_iconv_nfc2nfd = iconv_open("utf-8-mac", "utf-8"); > + if (my_iconv_nfc2nfd == (iconv_t) -1) { > + return NULL; > + } > + } > + return reencode_string_iconv(in, my_iconv_nfc2nfd); > +} > + > +char* > +str_nfd2nfc(const char *in) > +{ > + if (my_iconv_nfd2nfc == (iconv_t) -1){ > + my_iconv_nfd2nfc = iconv_open("utf-8", "utf-8-mac"); > + if (my_iconv_nfd2nfc == (iconv_t) -1) { > + return NULL; > + } > + } > + return reencode_string_iconv(in, my_iconv_nfd2nfc); > +} > +#endif /* APPLE */ > + > #endif > diff --git a/utf8.h b/utf8.h > index ebc4d2f..db29c8a 100644 > --- a/utf8.h > +++ b/utf8.h > @@ -13,8 +13,19 @@ int strbuf_add_wrapped_text(struct strbuf *buf, > > #ifndef NO_ICONV > char *reencode_string(const char *in, const char *out_encoding, const > char *in_encoding); > +char *reencode_string_iconv(const char *in, iconv_t conv); > +#ifdef __APPLE__ > +char *str_nfc2nfd(const char *in); > +char *str_nfd2nfc(const char *in); > +#else > +#define str_nfc2nfd(in) (NULL) > +#define str_nfd2nfc(in) (NULL) > +#endif > #else > #define reencode_string(a,b,c) NULL > +#define reencode_string2(a,b) NULL > +#define str_nfc2nfd(in) (NULL) > +#define str_nfd2nfc(in) (NULL) > #endif > > #endif -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html