[PATCH 4/4] Only re-encode certain parts in commit object, not the whole

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Commit object has its own format, which happens to be in ascii, but
not really subject to re-encoding.

There are only four areas that may be re-encoded: author line,
committer line, mergetag lines and commit body.  Encoding of tags
embedded in mergetag lines is not decided by commit encoding, so leave
it out and consider it binary.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 pretty.c |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 57 insertions(+), 1 deletions(-)

diff --git a/pretty.c b/pretty.c
index 5c433a2..6ccc091 100644
--- a/pretty.c
+++ b/pretty.c
@@ -489,6 +489,62 @@ static char *replace_encoding_header(char *buf, const char *encoding)
 	return strbuf_detach(&tmp, NULL);
 }
 
+/*
+ * Re-encode author, committer and commit body only, leaving the rest
+ * in ascii (or whatever the encoding it is in mergetag lines)
+ * regardless output encoding. We assume the commit is good, so no
+ * validation.
+ */
+static char *reencode_commit(const char *buffer,
+			     const char *out_enc, const char *in_enc)
+{
+	struct strbuf out = STRBUF_INIT;
+	struct strbuf buf = STRBUF_INIT;
+	char *reencoded, *s, *e;
+
+	strbuf_addstr(&buf, buffer);
+
+	s = strstr(buf.buf, "\nauthor ");
+	assert(s != NULL);
+	s += 8;			/* "\nauthor " */
+	strbuf_add(&out, buf.buf, s - buf.buf);
+	e = strchr(s, '\n');
+	*e = '\0';
+	reencoded = reencode_string(s, out_enc, in_enc);
+	if (reencoded && strchr(reencoded, '\n'))
+		die("your chosen encoding produces \\n out of nowhere?");
+	strbuf_addstr(&out, reencoded ? reencoded : s);
+	free(reencoded);
+
+	strbuf_addstr(&out, "\ncommitter ");
+	assert(!strncmp(e + 1, "committer ", 10));
+	s = e + 11;		/* "\ncommitter " */
+	e = strchr(s, '\n');
+	*e = '\0';
+	reencoded = reencode_string(s, out_enc, in_enc);
+	if (reencoded && strchr(reencoded, '\n'))
+		die("your chosen encoding produces \\n out of nowhere?");
+	strbuf_addstr(&out, reencoded ? reencoded : s);
+	free(reencoded);
+	*e = '\n';
+
+	s = e;
+	e = strstr(s, "\n\n");
+	if (e) {
+		e += 2;		/* "\n\n" */
+		strbuf_add(&out, s, e - s);
+
+		s = e;
+		reencoded = reencode_string(s, out_enc, in_enc);
+		strbuf_addstr(&out, reencoded ? reencoded : s);
+		free(reencoded);
+	} else
+		strbuf_addstr(&out, s);
+
+	strbuf_release(&buf);
+	return strbuf_detach(&out, NULL);
+}
+
 char *logmsg_reencode(const struct commit *commit,
 		      const char *output_encoding)
 {
@@ -514,7 +570,7 @@ char *logmsg_reencode(const struct commit *commit,
 		else
 			return NULL; /* nothing to do */
 	else
-		out = reencode_string(commit->buffer,
+		out = reencode_commit(commit->buffer,
 				      output_encoding, use_encoding);
 	if (out)
 		out = replace_encoding_header(out, output_encoding);
-- 
1.7.8.36.g69ee2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]