Commit objects may contain an encoding, supposedly meant to indicated the encoding used for that particular commt. This is the last header line before the comment as far as I know. Unfortunately the encoding header only reflects a setting in the config file, so it is most likely to be wrong by default unless you are on a UTF-8 platform (in which case the header isn't really needed). Later on we'll have to decode that more intelligently. For now we make an attempt to decode according to the setting if present, otherwise we use the platform default. Signed-off-by: Robin Rosenberg <robin.rosenberg@xxxxxxxxxx> --- .../src/org/spearce/jgit/lib/Commit.java | 55 +++++++++++++++++------ .../src/org/spearce/jgit/lib/ObjectWriter.java | 11 ++++- .../tst/org/spearce/jgit/lib/T0003_Basic.java | 32 +++++++++++++ 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Commit.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Commit.java index d1ef5de..1b644dd 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/Commit.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Commit.java @@ -16,10 +16,10 @@ */ package org.spearce.jgit.lib; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; +import java.io.DataInputStream; import java.io.IOException; -import java.io.InputStreamReader; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; @@ -45,6 +45,8 @@ public class Commit implements Treeish { private byte[] raw; + private Charset encoding; + public Commit(final Repository db) { objdb = db; parentIds = new ArrayList(2); @@ -132,8 +134,7 @@ public class Commit implements Treeish { // FIXME: handle I/O errors if (raw != null) { try { - BufferedReader br = new BufferedReader(new InputStreamReader( - new ByteArrayInputStream(raw))); + DataInputStream br = new DataInputStream(new ByteArrayInputStream(raw)); String n = br.readLine(); if (n == null || !n.startsWith("tree ")) { throw new CorruptObjectException(commitId, "no tree"); @@ -144,24 +145,33 @@ public class Commit implements Treeish { if (n == null || !n.startsWith("author ")) { throw new CorruptObjectException(commitId, "no author"); } - author = new PersonIdent(n.substring("author ".length())); + String rawAuthor = n.substring("author ".length()); n = br.readLine(); if (n == null || !n.startsWith("committer ")) { throw new CorruptObjectException(commitId, "no committer"); } - committer = new PersonIdent(n.substring("committer ".length())); + String rawCommitter = n.substring("committer ".length()); n = br.readLine(); - if (n == null || !n.equals("")) { - throw new CorruptObjectException(commitId, - "malformed header"); + if (n != null && n.startsWith( "encoding")) + encoding = Charset.forName(n.substring("encoding ".length())); + else + if (n == null || !n.equals("")) { + throw new CorruptObjectException(commitId, + "malformed header:"+n); } - StringBuffer tempMessage = new StringBuffer(); - char[] readBuf = new char[2048]; - int readLen; - while ((readLen = br.read(readBuf)) > 0) { - tempMessage.append(readBuf, 0, readLen); + byte[] readBuf = new byte[br.available()]; // in-memory stream so this is all bytes left + br.read(readBuf); + if (encoding != null) { + // TODO: this isn't reliable so we need to guess the encoding from the actual content + author = new PersonIdent(new String(rawAuthor.getBytes(),encoding)); + committer = new PersonIdent(new String(rawCommitter.getBytes(),encoding)); + message = new String(readBuf,encoding); + } else { + // TODO: use config setting / platform / ascii / iso-latin + author = new PersonIdent(new String(rawAuthor.getBytes())); + committer = new PersonIdent(new String(rawCommitter.getBytes())); + message = new String(readBuf); } - message = tempMessage.toString(); } catch (IOException e) { e.printStackTrace(); } finally { @@ -183,4 +193,19 @@ public class Commit implements Treeish { public String toString() { return "Commit[" + getCommitId() + " " + getAuthor() + "]"; } + + public void setEncoding(String e) { + encoding = Charset.forName(e); + } + + public void setEncoding(Charset e) { + encoding = e; + } + + public String getEncoding() { + if (encoding != null) + return encoding.name(); + else + return null; + } } diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectWriter.java b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectWriter.java index 667b569..a88fd95 100644 --- a/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectWriter.java +++ b/org.spearce.jgit/src/org/spearce/jgit/lib/ObjectWriter.java @@ -108,8 +108,11 @@ public class ObjectWriter { public ObjectId writeCommit(final Commit c) throws IOException { final ByteArrayOutputStream os = new ByteArrayOutputStream(); + String encoding = c.getEncoding(); + if (encoding == null) + encoding = Constants.CHARACTER_ENCODING; final OutputStreamWriter w = new OutputStreamWriter(os, - Constants.CHARACTER_ENCODING); + encoding); w.write("tree "); c.getTreeId().copyTo(w); @@ -130,6 +133,12 @@ public class ObjectWriter { w.write(c.getCommitter().toExternalString()); w.write('\n'); + if (!encoding.equals("UTF-8")) { + w.write("encoding "); + w.write(encoding); + w.write('\n'); + } + w.write('\n'); w.write(c.getMessage()); w.close(); diff --git a/org.spearce.jgit/tst/org/spearce/jgit/lib/T0003_Basic.java b/org.spearce.jgit/tst/org/spearce/jgit/lib/T0003_Basic.java index 9e6f805..9c807f9 100644 --- a/org.spearce.jgit/tst/org/spearce/jgit/lib/T0003_Basic.java +++ b/org.spearce.jgit/tst/org/spearce/jgit/lib/T0003_Basic.java @@ -21,6 +21,8 @@ import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.util.Date; +import java.util.TimeZone; public class T0003_Basic extends RepositoryTestCase { public void test001_Initalize() { @@ -385,4 +387,34 @@ public class T0003_Basic extends RepositoryTestCase { assertEquals(new PersonIdent(jauthor, 1154236443000L, -4 * 60), mapTag.getAuthor()); assertEquals("b5d3b45a96b340441f5abb9080411705c51cc86c", mapTag.getObjId().toString()); } + + public void test023_createCommitNonAscii() throws IOException { + final ObjectId emptyId = new ObjectWriter(db).writeBlob(new byte[0]); + final Tree almostEmptyTree = new Tree(db); + almostEmptyTree.addEntry(new FileTreeEntry(almostEmptyTree, emptyId, "empty".getBytes(), false)); + final ObjectId almostEmptyTreeId = new ObjectWriter(db).writeTree(almostEmptyTree); + Commit commit = new Commit(db); + commit.setTreeId(almostEmptyTreeId); + commit.setAuthor(new PersonIdent("Joe H\u00e4cker","joe@xxxxxxxxxxx",new Date(1900,0,1), TimeZone.getTimeZone("CET"))); + commit.setCommitter(new PersonIdent("Joe Hacker","joe2@xxxxxxxxxxx",new Date(1900,0,1), TimeZone.getTimeZone("CET"))); + commit.setEncoding("UTF-8"); + commit.setMessage("\u00dcbergeeks"); + ObjectId cid = new ObjectWriter(db).writeCommit(commit); + assertEquals("ed63583834b8a627474ee9f9330434ef37a04825", cid.toString()); + } + + public void test024_createCommitNonAscii() throws IOException { + final ObjectId emptyId = new ObjectWriter(db).writeBlob(new byte[0]); + final Tree almostEmptyTree = new Tree(db); + almostEmptyTree.addEntry(new FileTreeEntry(almostEmptyTree, emptyId, "empty".getBytes(), false)); + final ObjectId almostEmptyTreeId = new ObjectWriter(db).writeTree(almostEmptyTree); + Commit commit = new Commit(db); + commit.setTreeId(almostEmptyTreeId); + commit.setAuthor(new PersonIdent("Joe H\u00e4cker","joe@xxxxxxxxxxx",new Date(1900,0,1), TimeZone.getTimeZone("CET"))); + commit.setCommitter(new PersonIdent("Joe Hacker","joe2@xxxxxxxxxxx",new Date(1900,0,1), TimeZone.getTimeZone("CET"))); + commit.setEncoding("ISO-8859-1"); + commit.setMessage("\u00dcbergeeks"); + ObjectId cid = new ObjectWriter(db).writeCommit(commit); + assertEquals("afc8532038e7220179127b7e96acf534486798ce", cid.toString()); + } } - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html