Re: Git, Mac OS X and German special characters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Torsten Bögershausen venit, vidit, dixit 20.05.2010 11:02:
> Hej,
> I have the same problem here.
> Below there is a patch, which may solve the problem.
> (Yes, whitespaces are broken. I'm still fighting with
> git format-patch -s --cover-letter -M --stdout origin/master | git 
> imap-send)
> But this patch may be a start point for improvements.
> Comments welcome
> BR
> /Torsten
> 
> 
> 
> Improved interwork between Mac OS X and linux when umlauts are used
> When a git repository containing utf-8 coded umlaut characters
> is cloned onto an Mac OS X machine, the Mac OS system will convert
> all filenames returned by readdir() into denormalized utf-8.
> As a result of this conversion, git will not find them on disk.
> This helps by treating the NFD and NFD version of filenames as
> identical on Mac OS.
> 
> 
> 
> 
> 
> 
> Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>

You signed off, but is Markus Kuhn's code from UCS GPL2-licensed?
Also, a few tests would be nice.

I remember we had threads on this issue in the past. I haven't checked
yet (Thunderbird pruned my nntp history), but it is worth checking that
you addressed any issues mentioned there.

I have no Mac so I can't test, sorry. Would be happy to run Mac OS in a
vm, but you know...

Thanks for looking into this!

Michael

> ---
> name-hash.c |   40 ++++++++++++++++++++++++++++++++++++++++
> utf8.c      |   55 ++++++++++++++++++++++++++++++++++++++++++++++++-------
> utf8.h      |   11 +++++++++++
> 3 files changed, 99 insertions(+), 7 deletions(-)
> 
> diff --git a/name-hash.c b/name-hash.c
> index 0031d78..e6494e8 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -7,6 +7,7 @@
>   */
> #define NO_THE_INDEX_COMPATIBILITY_MACROS
> #include "cache.h"
> +#include "utf8.h"
> 
> /*
>   * This removes bit 5 if bit 6 is set.
> @@ -100,6 +101,25 @@ static int same_name(const struct cache_entry *ce, 
> const char *name, int namelen
>      return icase && slow_same_name(name, namelen, ce->name, len);
> }
> 
> +#ifdef __APPLE__
> +struct cache_entry *index_name_exists2(struct index_state *istate, 
> const char *name, int icase)
> +{
> +    int namelen = (int)strlen(name);
> +    unsigned int hash = hash_name(name, namelen);
> +    struct cache_entry *ce;
> +
> +    ce = lookup_hash(hash, &istate->name_hash);
> +    while (ce) {
> +        if (!(ce->ce_flags & CE_UNHASHED)) {
> +            if (same_name(ce, name, namelen, icase))
> +                return ce;
> +        }
> +        ce = ce->next;
> +    }
> +    return NULL;
> +}
> +#endif
> +
> struct cache_entry *index_name_exists(struct index_state *istate, const 
> char *name, int namelen, int icase)
> {
>      unsigned int hash = hash_name(name, namelen);
> @@ -115,5 +135,25 @@ struct cache_entry *index_name_exists(struct 
> index_state *istate, const char *na
>          }
>          ce = ce->next;
>      }
> +#ifdef __APPLE__
> +    {
> +        char *name_nfc_nfd;
> +        name_nfc_nfd = str_nfc2nfd(name);
> +        if (name_nfc_nfd) {
> +            ce = index_name_exists2(istate, name_nfc_nfd, icase);
> +            free(name_nfc_nfd);
> +            if (ce)
> +                return ce;
> +        }
> +        name_nfc_nfd = str_nfd2nfc(name);
> +        if (name_nfc_nfd) {
> +            ce = index_name_exists2(istate, name_nfc_nfd, icase);
> +            free(name_nfc_nfd);
> +            if (ce)
> +                return ce;
> +        }
> +    }
> +#endif
> +
>      return NULL;
> }
> diff --git a/utf8.c b/utf8.c
> index 84cfc72..8e794dc 100644
> --- a/utf8.c
> +++ b/utf8.c
> @@ -2,6 +2,11 @@
> #include "strbuf.h"
> #include "utf8.h"
> 
> +#ifdef __APPLE__
> +static iconv_t my_iconv_nfd2nfc = (iconv_t) -1;
> +static iconv_t my_iconv_nfc2nfd = (iconv_t) -1;
> +#endif
> +
> /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */
> 
> struct interval {
> @@ -424,18 +429,13 @@ int is_encoding_utf8(const char *name)
> #else
>      typedef char * iconv_ibp;
> #endif
> -char *reencode_string(const char *in, const char *out_encoding, const 
> char *in_encoding)
> +
> +char *reencode_string_iconv(const char *in, iconv_t conv)
> {
> -    iconv_t conv;
>      size_t insz, outsz, outalloc;
>      char *out, *outpos;
>      iconv_ibp cp;
> 
> -    if (!in_encoding)
> -        return NULL;
> -    conv = iconv_open(out_encoding, in_encoding);
> -    if (conv == (iconv_t) -1)
> -        return NULL;
>      insz = strlen(in);
>      outsz = insz;
>      outalloc = outsz + 1; /* for terminating NUL */
> @@ -469,7 +469,48 @@ char *reencode_string(const char *in, const char 
> *out_encoding, const char *in_e
>              break;
>          }
>      }
> +    return out;
> +}
> +
> +char *reencode_string(const char *in, const char *out_encoding, const 
> char *in_encoding)
> +{
> +    iconv_t conv;
> +    char *out;
> +
> +    if (!in_encoding)
> +        return NULL;
> +    conv = iconv_open(out_encoding, in_encoding);
> +    if (conv == (iconv_t) -1)
> +        return NULL;
> +    out = reencode_string_iconv(in, conv);
>      iconv_close(conv);
>      return out;
> }
> +
> +#ifdef __APPLE__
> +char*
> +str_nfc2nfd(const char *in)
> +{
> +    if (my_iconv_nfc2nfd == (iconv_t) -1) {
> +        my_iconv_nfc2nfd = iconv_open("utf-8-mac", "utf-8");
> +        if (my_iconv_nfc2nfd == (iconv_t) -1) {
> +            return NULL;
> +        }
> +    }
> +    return reencode_string_iconv(in, my_iconv_nfc2nfd);
> +}
> +
> +char*
> +str_nfd2nfc(const char *in)
> +{
> +    if (my_iconv_nfd2nfc == (iconv_t) -1){
> +        my_iconv_nfd2nfc = iconv_open("utf-8", "utf-8-mac");
> +        if (my_iconv_nfd2nfc == (iconv_t) -1) {
> +            return NULL;
> +        }
> +    }
> +    return reencode_string_iconv(in, my_iconv_nfd2nfc);
> +}
> +#endif /* APPLE */
> +
> #endif
> diff --git a/utf8.h b/utf8.h
> index ebc4d2f..db29c8a 100644
> --- a/utf8.h
> +++ b/utf8.h
> @@ -13,8 +13,19 @@ int strbuf_add_wrapped_text(struct strbuf *buf,
> 
> #ifndef NO_ICONV
> char *reencode_string(const char *in, const char *out_encoding, const 
> char *in_encoding);
> +char *reencode_string_iconv(const char *in, iconv_t conv);
> +#ifdef __APPLE__
> +char *str_nfc2nfd(const char *in);
> +char *str_nfd2nfc(const char *in);
> +#else
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> +#endif
> #else
> #define reencode_string(a,b,c) NULL
> +#define reencode_string2(a,b) NULL
> +#define str_nfc2nfd(in) (NULL)
> +#define str_nfd2nfc(in) (NULL)
> #endif
> 
> #endif
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]