[PATCH 07/12] utf8: keep NULs in reencode_string()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



reencode_string() will be used in the next patch for re-encoding
pretty output, which can contain NULs.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 builtin/fast-export.c    |  3 ++-
 builtin/mailinfo.c       |  3 ++-
 compat/precompose_utf8.c |  2 +-
 notes.c                  |  4 +++-
 pretty.c                 |  3 ++-
 sequencer.c              |  5 +++--
 utf8.c                   | 10 +++++++---
 utf8.h                   | 10 +++++++---
 8 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 77dffd1..7ba9f3b 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -316,7 +316,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev)
 
 	mark_next_object(&commit->object);
 	if (!is_encoding_utf8(encoding))
-		reencoded = reencode_string(message, "UTF-8", encoding);
+		reencoded = reencode_string(message, strlen(message),
+					    "UTF-8", encoding, NULL);
 	if (!commit->parents)
 		printf("reset %s\n", (const char*)commit->util);
 	printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index 24a772d..129e7dc 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -486,7 +486,8 @@ static void convert_to_utf8(struct strbuf *line, const char *charset)
 
 	if (same_encoding(metainfo_charset, charset))
 		return;
-	out = reencode_string(line->buf, metainfo_charset, charset);
+	out = reencode_string(line->buf, line->len,
+			      metainfo_charset, charset, NULL);
 	if (!out)
 		die("cannot convert from %s to %s",
 		    charset, metainfo_charset);
diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c
index 8cf5955..d9203d0 100644
--- a/compat/precompose_utf8.c
+++ b/compat/precompose_utf8.c
@@ -78,7 +78,7 @@ void precompose_argv(int argc, const char **argv)
 		size_t namelen;
 		oldarg = argv[i];
 		if (has_non_ascii(oldarg, (size_t)-1, &namelen)) {
-			newarg = reencode_string_iconv(oldarg, namelen, ic_precompose);
+			newarg = reencode_string_iconv(oldarg, namelen, ic_precompose, NULL);
 			if (newarg)
 				argv[i] = newarg;
 		}
diff --git a/notes.c b/notes.c
index f63fd57..4ae3b25 100644
--- a/notes.c
+++ b/notes.c
@@ -1222,7 +1222,9 @@ static void format_note(struct notes_tree *t, const unsigned char *object_sha1,
 
 	if (output_encoding && *output_encoding &&
 	    !is_encoding_utf8(output_encoding)) {
-		char *reencoded = reencode_string(msg, output_encoding, utf8);
+		char *reencoded = reencode_string(msg, strlen(msg),
+						  output_encoding, utf8,
+						  NULL);
 		if (reencoded) {
 			free(msg);
 			msg = reencoded;
diff --git a/pretty.c b/pretty.c
index e2241e5..092dd1d 100644
--- a/pretty.c
+++ b/pretty.c
@@ -643,7 +643,8 @@ char *logmsg_reencode(const struct commit *commit,
 		 * this point, we are done with msg. If we allocated a fresh
 		 * copy, we can free it.
 		 */
-		out = reencode_string(msg, output_encoding, use_encoding);
+		out = reencode_string(msg, strlen(msg),
+				      output_encoding, use_encoding, NULL);
 		if (out && msg != commit->buffer)
 			free(msg);
 	}
diff --git a/sequencer.c b/sequencer.c
index aef5e8a..bf15531 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -61,8 +61,9 @@ static int get_message(struct commit *commit, struct commit_message *out)
 	out->reencoded_message = NULL;
 	out->message = commit->buffer;
 	if (same_encoding(encoding, git_commit_encoding))
-		out->reencoded_message = reencode_string(commit->buffer,
-					git_commit_encoding, encoding);
+		out->reencoded_message =
+			reencode_string(commit->buffer, strlen(commit->buffer),
+					git_commit_encoding, encoding, NULL);
 	if (out->reencoded_message)
 		out->message = out->reencoded_message;
 
diff --git a/utf8.c b/utf8.c
index 38322a1..9d98043 100644
--- a/utf8.c
+++ b/utf8.c
@@ -468,7 +468,7 @@ int utf8_fprintf(FILE *stream, const char *format, ...)
 #else
 	typedef char * iconv_ibp;
 #endif
-char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv)
+char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outsz_p)
 {
 	size_t outsz, outalloc;
 	char *out, *outpos;
@@ -502,13 +502,17 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv)
 		}
 		else {
 			*outpos = '\0';
+			if (outsz_p)
+				*outsz_p = outpos - out;
 			break;
 		}
 	}
 	return out;
 }
 
-char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding)
+char *reencode_string(const char *in, int insz,
+		      const char *out_encoding, const char *in_encoding,
+		      int *outsz)
 {
 	iconv_t conv;
 	char *out;
@@ -518,7 +522,7 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e
 	conv = iconv_open(out_encoding, in_encoding);
 	if (conv == (iconv_t) -1)
 		return NULL;
-	out = reencode_string_iconv(in, strlen(in), conv);
+	out = reencode_string_iconv(in, insz, conv, outsz);
 	iconv_close(conv);
 	return out;
 }
diff --git a/utf8.h b/utf8.h
index a556932..99db3e0 100644
--- a/utf8.h
+++ b/utf8.h
@@ -17,10 +17,14 @@ void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
 			     int indent, int indent2, int width);
 
 #ifndef NO_ICONV
-char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv);
-char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding);
+char *reencode_string_iconv(const char *in, size_t insz,
+			    iconv_t conv, int *outsz);
+char *reencode_string(const char *in, int insz,
+		      const char *out_encoding,
+		      const char *in_encoding,
+		      int *outsz);
 #else
-#define reencode_string(a,b,c) NULL
+#define reencode_string(a,b,c,d) NULL
 #endif
 
 #endif
-- 
1.8.2.83.gc99314b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]