The add_user_info function formats the commit as a mail message, and uses add_rfc2047 to format the From: line. The add_rfc2047 assumes that the string is encoded as utf-8. --- builtin-mailinfo.c | 2 +- commit.c | 10 +++++++++- utf8.c | 9 +++++++-- utf8.h | 2 +- 4 files changed, 18 insertions(+), 5 deletions(-) I was hit by this problem when working with an old repository where I had used latin1, and I tried to use "git rebase". Another option would have been to use the correct encoding in the RFC2047 header, but this was a quicker solution. diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index 583da38..3fd8e00 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -513,7 +513,7 @@ static void convert_to_utf8(char *line, char *charset) { static char latin_one[] = "latin1"; char *input_charset = *charset ? charset : latin_one; - char *out = reencode_string(line, metainfo_charset, input_charset); + char *out = reencode_string(line, metainfo_charset, input_charset, NULL); if (!out) die("cannot convert from %s to %s\n", diff --git a/commit.c b/commit.c index 496d37a..8477fa7 100644 --- a/commit.c +++ b/commit.c @@ -486,6 +486,10 @@ static int add_rfc2047(char *buf, const char *line, int len) if (!needquote) return sprintf(buf, "%.*s", len, line); + if (git_commit_encoding) + line = reencode_string(line, "utf-8", + git_commit_encoding, &len); + memcpy(bp, q_utf8, sizeof(q_utf8)-1); bp += sizeof(q_utf8)-1; for (i = 0; i < len; i++) { @@ -501,6 +505,10 @@ static int add_rfc2047(char *buf, const char *line, int len) } memcpy(bp, "?=", 2); bp += 2; + + if (git_commit_encoding) + free((char *)line); + return bp - buf; } @@ -687,7 +695,7 @@ static char *logmsg_reencode(const struct commit *commit) out = strdup(commit->buffer); else out = reencode_string(commit->buffer, - output_encoding, encoding); + output_encoding, encoding, NULL); if (out) out = replace_encoding_header(out, output_encoding); diff --git a/utf8.c b/utf8.c index 7c80eec..ee9f514 100644 --- a/utf8.c +++ b/utf8.c @@ -291,7 +291,7 @@ int is_encoding_utf8(const char *name) * with iconv. If the conversion fails, returns NULL. */ #ifndef NO_ICONV -char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding, int *len) { iconv_t conv; size_t insz, outsz, outalloc; @@ -302,7 +302,10 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e conv = iconv_open(out_encoding, in_encoding); if (conv == (iconv_t) -1) return NULL; - insz = strlen(in); + if (len) + insz = *len; + else + insz = strlen(in); outsz = insz; outalloc = outsz + 1; /* for terminating NUL */ out = xmalloc(outalloc); @@ -332,6 +335,8 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e } else { *outpos = '\0'; + if (len) + *len = outpos - out; break; } } diff --git a/utf8.h b/utf8.h index a07c5a8..eb64d46 100644 --- a/utf8.h +++ b/utf8.h @@ -8,7 +8,7 @@ int is_encoding_utf8(const char *name); void print_wrapped_text(const char *text, int indent, int indent2, int len); #ifndef NO_ICONV -char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding); +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding, int *len); #else #define reencode_string(a,b,c) NULL #endif -- 1.4.4.4.ge10a-dirty -- David Kågedal - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html