Re: [JGIT PATCH v2] Encode/decode index and tree entries using UTF-8

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.

Signed-off-by: Robin Rosenberg <robin.rosnberg@xxxxxxxxxx>
---

söndagen den 19 oktober 2008 19.14.56 skrev Shawn O. Pearce:
> Robin Rosenberg <robin.rosenberg.lists@xxxxxxxxxx> wrote:
> > Decoding uses the same strategy as for commit messages and other string
> > entities. Encoding is always done in UTF-8. This is incompatible with
> > Git for non-unicode unices, but it leads to the expected behavior on
> > Windows and cross-locale sharing of repositories.
> 
> FWIW I think this is a good idea.

Ok, so here's the update.  We might want to move the encode out of Constants
too as it is no longer a utility for constants.

-- robin

 .../src/org/spearce/jgit/lib/GitIndex.java         |   27 ++++++++--------
 .../src/org/spearce/jgit/lib/Tree.java             |   16 ++++-----
 .../src/org/spearce/jgit/lib/TreeEntry.java        |   14 +++-----
 .../src/org/spearce/jgit/util/RawParseUtils.java   |   32 ++++++++++++++++++++
 4 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..bafddef 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.NotSupportedException;
 import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
 	 * @param f
 	 *            the file whose path shall be removed.
 	 * @return true if such a path was found (and thus removed)
+	 * @throws IOException 
 	 */
-	public boolean remove(File wd, File f) {
+	public boolean remove(File wd, File f) throws IOException {
 		byte[] key = makeKey(wd, f);
 		return entries.remove(key) != null;
 	}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
 		return FS.INSTANCE.supportsExecute();
 	}
 
-	static byte[] makeKey(File wd, File f) {
+	static byte[] makeKey(File wd, File f) throws IOException {
 		if (!f.getPath().startsWith(wd.getPath()))
 			throw new Error("Path is not in working dir");
 		String relName = Repository.stripWorkDir(wd, f);
-		return relName.getBytes();
+		return Constants.encode(relName);
 	}
 
 	Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
 				size = -1;
 			}
 			sha1 = f.getId();
-			name = f.getFullName().getBytes("UTF-8");
+			name = Constants.encode(f.getFullName());
 			flags = (short) ((stage << 12) | name.length); // TODO: fix flags
 		}
 
@@ -580,7 +582,7 @@ private File getFile(File wd) {
 		}
 
 		public String toString() {
-			return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+			return getName() + "/SHA-1(" + sha1.name() + ")/M:"
 					+ new Date(ctime / 1000000L) + "/C:"
 					+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
 					+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
 		 * @return path name for this entry
 		 */
 		public String getName() {
-			return new String(name);
+			return RawParseUtils.decode(name);
 		}
 
 		/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
 				readTree(name, (Tree) te);
 			} else {
 				Entry e = new Entry(te, 0);
-				entries.put(name.getBytes("UTF-8"), e);
+				entries.put(Constants.encode(name), e);
 			}
 		}
 	}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
 	 * @throws IOException
 	 */
 	public Entry addEntry(TreeEntry te) throws IOException {
-		byte[] key = te.getFullName().getBytes("UTF-8");
+		byte[] key = Constants.encode(te.getFullName());
 		Entry e = new Entry(te, 0);
 		entries.put(key, e);
 		return e;
@@ -824,8 +826,7 @@ public ObjectId writeTree() throws IOException {
 			}
 			while (trees.size() < newName.length) {
 				if (!current.existsTree(newName[trees.size() - 1])) {
-					current = new Tree(current, newName[trees.size() - 1]
-							.getBytes());
+					current = new Tree(current, Constants.encode(newName[trees.size() - 1]));
 					current.getParent().addEntry(current);
 					trees.push(current);
 				} else {
@@ -835,7 +836,7 @@ public ObjectId writeTree() throws IOException {
 				}
 			}
 			FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
-					newName[newName.length - 1].getBytes(),
+					Constants.encode(newName[newName.length - 1]),
 					(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
 			current.addEntry(ne);
 		}
@@ -880,7 +881,7 @@ int longestCommonPath(String[] a, String[] b) {
 	 * Small beware: Unaccounted for are unmerged entries. You may want
 	 * to abort if members with stage != 0 are found if you are doing
 	 * any updating operations. All stages will be found after one another
-	 * here later. Currently only one stage per name is returned.
+	 * here later. Currently only one stage per name is returned.	
 	 *
 	 * @return The index entries sorted
 	 */
@@ -896,7 +897,7 @@ int longestCommonPath(String[] a, String[] b) {
 	 * @throws UnsupportedEncodingException
 	 */
 	public Entry getEntry(String path) throws UnsupportedEncodingException {
-		return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+		return (Entry) entries.get(Repository.gitInternalSlash(Constants.encode(path)));
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..0ecd04d 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
 import org.spearce.jgit.errors.CorruptObjectException;
 import org.spearce.jgit.errors.EntryExistsException;
 import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * A representation of a Git tree entry. A Tree is a directory in Git.
@@ -251,7 +252,7 @@ public void unload() {
 	 * @throws IOException
 	 */
 	public FileTreeEntry addFile(final String name) throws IOException {
-		return addFile(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+		return addFile(Repository.gitInternalSlash(Constants.encode(name)), 0);
 	}
 
 	/**
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(newName));
 		else if (slash < s.length) {
 			final Tree t = new Tree(this, newName);
 			insertEntry(p, t);
@@ -304,7 +304,7 @@ else if (slash < s.length) {
 	 * @throws IOException
 	 */
 	public Tree addTree(final String name) throws IOException {
-		return addTree(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+		return addTree(Repository.gitInternalSlash(Constants.encode(name)), 0);
 	}
 
 	/**
@@ -332,8 +332,7 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
 
 		final byte[] newName = substring(s, offset, slash);
 		if (p >= 0)
-			throw new EntryExistsException(new String(newName,
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(RawParseUtils.decode(newName));
 
 		final Tree t = new Tree(this, newName);
 		insertEntry(p, t);
@@ -355,8 +354,7 @@ public void addEntry(final TreeEntry e) throws IOException {
 			e.attachParent(this);
 			insertEntry(p, e);
 		} else {
-			throw new EntryExistsException(new String(e.getNameUTF8(),
-					Constants.CHARACTER_ENCODING));
+			throw new EntryExistsException(e.getName());
 		}
 	}
 
@@ -450,7 +448,7 @@ public boolean existsBlob(String path) throws IOException {
 	}
 
 	private TreeEntry findMember(final String s, byte slast) throws IOException {
-		return findMember(Repository.gitInternalSlash(s.getBytes(Constants.CHARACTER_ENCODING)), slast, 0);
+		return findMember(Repository.gitInternalSlash(Constants.encode(s)), slast, 0);
 	}
 
 	private TreeEntry findMember(final byte[] s, final byte slast, final int offset)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..c95863c 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
 package org.spearce.jgit.lib;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 
 import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
 
 /**
  * This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,9 @@ public Repository getRepository() {
 	 * @return the name of this entry.
 	 */
 	public String getName() {
-		try {
-			return nameUTF8 != null ? new String(nameUTF8,
-					Constants.CHARACTER_ENCODING) : null;
-		} catch (UnsupportedEncodingException uee) {
-			throw new RuntimeException("JVM doesn't support "
-					+ Constants.CHARACTER_ENCODING, uee);
-		}
+		if (nameUTF8 != null)
+			return RawParseUtils.decode(nameUTF8);
+		return null;
 	}
 
 	/**
@@ -142,7 +138,7 @@ public String getName() {
 	 * @throws IOException
 	 */
 	public void rename(final String n) throws IOException {
-		rename(n.getBytes(Constants.CHARACTER_ENCODING));
+		rename(Constants.encode(n));
 	}
 
 	/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 6c0e339..4b96439 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -379,6 +379,38 @@ public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
 	}
 
 	/**
+	 * Decode a buffer under UTF-8, if possible.
+	 *
+	 * If the byte stream cannot be decoded that way, the platform default is tried
+	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+	 * 
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @return a string representation of the range <code>[start,end)</code>,
+	 *         after decoding the region through the specified character set.
+	 */
+	public static String decode(final byte[] buffer) {
+		return decode(Constants.CHARSET, buffer, 0, buffer.length);
+	}
+
+	/**
+	 * Decode a buffer under the specified character set if possible.
+	 *
+	 * If the byte stream cannot be decoded that way, the platform default is tried
+	 * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+	 * 
+	 * @param cs
+	 *            character set to use when decoding the buffer.
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @return a string representation of the range <code>[start,end)</code>,
+	 *         after decoding the region through the specified character set.
+	 */
+	public static String decode(final Charset cs, final byte[] buffer) {
+		return decode(cs, buffer, 0, buffer.length);
+	}
+
+	/**
 	 * Decode a region of the buffer under the specified character set if possible.
 	 *
 	 * If the byte stream cannot be decoded that way, the platform default is tried
-- 
1.6.0.2.308.gef4a

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux