[PATCH] Reencode committer info to utf-8 before formatting mail header

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The add_user_info function formats the commit as a mail message, and
uses add_rfc2047 to format the From: line.  The add_rfc2047 assumes
that the string is encoded as utf-8.
---
 builtin-mailinfo.c |    2 +-
 commit.c           |   10 +++++++++-
 utf8.c             |    9 +++++++--
 utf8.h             |    2 +-
 4 files changed, 18 insertions(+), 5 deletions(-)

I was hit by this problem when working with an old repository where I
had used latin1, and I tried to use "git rebase".

Another option would have been to use the correct encoding in the
RFC2047 header, but this was a quicker solution.

diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index 583da38..3fd8e00 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -513,7 +513,7 @@ static void convert_to_utf8(char *line, char *charset)
 {
 	static char latin_one[] = "latin1";
 	char *input_charset = *charset ? charset : latin_one;
-	char *out = reencode_string(line, metainfo_charset, input_charset);
+	char *out = reencode_string(line, metainfo_charset, input_charset, NULL);
 
 	if (!out)
 		die("cannot convert from %s to %s\n",
diff --git a/commit.c b/commit.c
index 496d37a..8477fa7 100644
--- a/commit.c
+++ b/commit.c
@@ -486,6 +486,10 @@ static int add_rfc2047(char *buf, const char *line, int len)
 	if (!needquote)
 		return sprintf(buf, "%.*s", len, line);
 
+        if (git_commit_encoding)
+                line = reencode_string(line, "utf-8",
+                                       git_commit_encoding, &len);
+
 	memcpy(bp, q_utf8, sizeof(q_utf8)-1);
 	bp += sizeof(q_utf8)-1;
 	for (i = 0; i < len; i++) {
@@ -501,6 +505,10 @@ static int add_rfc2047(char *buf, const char *line, int len)
 	}
 	memcpy(bp, "?=", 2);
 	bp += 2;
+
+        if (git_commit_encoding)
+                free((char *)line);
+
 	return bp - buf;
 }
 
@@ -687,7 +695,7 @@ static char *logmsg_reencode(const struct commit *commit)
 		out = strdup(commit->buffer);
 	else
 		out = reencode_string(commit->buffer,
-				      output_encoding, encoding);
+				      output_encoding, encoding, NULL);
 	if (out)
 		out = replace_encoding_header(out, output_encoding);
 
diff --git a/utf8.c b/utf8.c
index 7c80eec..ee9f514 100644
--- a/utf8.c
+++ b/utf8.c
@@ -291,7 +291,7 @@ int is_encoding_utf8(const char *name)
  * with iconv.  If the conversion fails, returns NULL.
  */
 #ifndef NO_ICONV
-char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
+char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding, int *len)
 {
 	iconv_t conv;
 	size_t insz, outsz, outalloc;
@@ -302,7 +302,10 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
 	conv = iconv_open(out_encoding, in_encoding);
 	if (conv == (iconv_t) -1)
 		return NULL;
-	insz = strlen(in);
+        if (len)
+                insz = *len;
+        else
+                insz = strlen(in);
 	outsz = insz;
 	outalloc = outsz + 1; /* for terminating NUL */
 	out = xmalloc(outalloc);
@@ -332,6 +335,8 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
 		}
 		else {
 			*outpos = '\0';
+                        if (len)
+                                *len = outpos - out;
 			break;
 		}
 	}
diff --git a/utf8.h b/utf8.h
index a07c5a8..eb64d46 100644
--- a/utf8.h
+++ b/utf8.h
@@ -8,7 +8,7 @@ int is_encoding_utf8(const char *name);
 void print_wrapped_text(const char *text, int indent, int indent2, int len);
 
 #ifndef NO_ICONV
-char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding);
+char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding, int *len);
 #else
 #define reencode_string(a,b,c) NULL
 #endif
-- 
1.4.4.4.ge10a-dirty


-- 
David Kågedal

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]